#install.packages('TDAmapper')
library(TDAmapper)
library(cluster)
library(vip)
##
## Attaching package: 'vip'
## The following object is masked from 'package:utils':
##
## vi
#install.packages('kernlab’)
library(kernlab)
#install.packages(‘class’)
library(class)
#install.packages('nnet')
library(nnet)
#install.packages(‘randomForest’)
library(randomForest)
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
#install.packages('e1071')
library(e1071)
#install.packages("BayesFactor")
library(BayesFactor)
## Loading required package: coda
##
## Attaching package: 'coda'
## The following object is masked from 'package:kernlab':
##
## nvar
## Loading required package: Matrix
## ************
## Welcome to BayesFactor 0.9.12-4.5. If you have questions, please contact Richard Morey (richarddmorey@gmail.com).
##
## Type BFManual() to open the manual.
## ************
library(BayesPPD)
library(bayestestR)
#install.packages('igraph')
library('igraph')
## Warning: package 'igraph' was built under R version 4.3.3
##
## Attaching package: 'igraph'
## The following object is masked from 'package:BayesFactor':
##
## compare
## The following object is masked from 'package:class':
##
## knn
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
#install.packages('locfit')
library(locfit)
## locfit 1.5-9.8 2023-06-11
#install.packages('ggplot2’)
library(ggplot2)
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
##
## margin
## The following object is masked from 'package:kernlab':
##
## alpha
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:igraph':
##
## as_data_frame, groups, union
## The following object is masked from 'package:randomForest':
##
## combine
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#install.packages('networkD3')
library(networkD3)
library(rstanarm)
## Loading required package: Rcpp
## This is rstanarm version 2.26.1
## - See https://mc-stan.org/rstanarm/articles/priors for changes to default priors!
## - Default priors may change, so it's safest to specify priors, even if equivalent to the defaults.
## - For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores())
library(see)
#install.packages('tidyverse')
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ lubridate::%--%() masks igraph::%--%()
## ✖ ggplot2::alpha() masks kernlab::alpha()
## ✖ tibble::as_data_frame() masks dplyr::as_data_frame(), igraph::as_data_frame()
## ✖ dplyr::combine() masks randomForest::combine()
## ✖ purrr::compose() masks igraph::compose()
## ✖ purrr::cross() masks kernlab::cross()
## ✖ tidyr::crossing() masks igraph::crossing()
## ✖ tidyr::expand() masks Matrix::expand()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ ggplot2::margin() masks randomForest::margin()
## ✖ purrr::none() masks locfit::none()
## ✖ tidyr::pack() masks Matrix::pack()
## ✖ purrr::simplify() masks igraph::simplify()
## ✖ tidyr::unpack() masks Matrix::unpack()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#install.packages('caret')
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
##
## The following objects are masked from 'package:rstanarm':
##
## compare_models, R2
#install.packages('ISLR')
library(ISLR)
#install.packages('MCMCpack')
library(MCMCpack)
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## ##
## ## Markov Chain Monte Carlo Package (MCMCpack)
## ## Copyright (C) 2003-2025 Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park
## ##
## ## Support provided by the U.S. National Science Foundation
## ## (Grants SES-0350646 and SES-0350613)
## ##
#linstall.packages("caret")
library(caret)
library(TDA)
##
## Attaching package: 'TDA'
##
## The following object is masked from 'package:cluster':
##
## silhouette
library(TDAstats)
library(ks)
##
## Attaching package: 'ks'
##
## The following object is masked from 'package:TDA':
##
## kde
##
## The following object is masked from 'package:MCMCpack':
##
## vech
##
## The following object is masked from 'package:igraph':
##
## compare
##
## The following object is masked from 'package:BayesFactor':
##
## compare
#install.packages('MLmetrics')
library(MLmetrics)
##
## Attaching package: 'MLmetrics'
##
## The following objects are masked from 'package:caret':
##
## MAE, RMSE
##
## The following object is masked from 'package:base':
##
## Recall
#install.packages('googledrive')
library(googledrive)
#install.packages('stringr')
library(stringr)
#install.packages('ks')
library(ks)
library(GGally)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
#import adult dataset from UCI repository stored on my desktop
#Adult **
adult <- read.csv("~/Desktop/NCU/DissertationDatasets/Adult/adult.data", header=FALSE)
head(str(adult))
## 'data.frame': 32561 obs. of 15 variables:
## $ V1 : int 39 50 38 53 28 37 49 52 31 42 ...
## $ V2 : chr " State-gov" " Self-emp-not-inc" " Private" " Private" ...
## $ V3 : int 77516 83311 215646 234721 338409 284582 160187 209642 45781 159449 ...
## $ V4 : chr " Bachelors" " Bachelors" " HS-grad" " 11th" ...
## $ V5 : int 13 13 9 7 13 14 5 9 14 13 ...
## $ V6 : chr " Never-married" " Married-civ-spouse" " Divorced" " Married-civ-spouse" ...
## $ V7 : chr " Adm-clerical" " Exec-managerial" " Handlers-cleaners" " Handlers-cleaners" ...
## $ V8 : chr " Not-in-family" " Husband" " Not-in-family" " Husband" ...
## $ V9 : chr " White" " White" " White" " Black" ...
## $ V10: chr " Male" " Male" " Male" " Male" ...
## $ V11: int 2174 0 0 0 0 0 0 0 14084 5178 ...
## $ V12: int 0 0 0 0 0 0 0 0 0 0 ...
## $ V13: int 40 13 40 40 40 40 16 45 50 40 ...
## $ V14: chr " United-States" " United-States" " United-States" " United-States" ...
## $ V15: chr " <=50K" " <=50K" " <=50K" " <=50K" ...
## NULL
summary(adult)
## V1 V2 V3 V4
## Min. :17.00 Length:32561 Min. : 12285 Length:32561
## 1st Qu.:28.00 Class :character 1st Qu.: 117827 Class :character
## Median :37.00 Mode :character Median : 178356 Mode :character
## Mean :38.58 Mean : 189778
## 3rd Qu.:48.00 3rd Qu.: 237051
## Max. :90.00 Max. :1484705
## V5 V6 V7 V8
## Min. : 1.00 Length:32561 Length:32561 Length:32561
## 1st Qu.: 9.00 Class :character Class :character Class :character
## Median :10.00 Mode :character Mode :character Mode :character
## Mean :10.08
## 3rd Qu.:12.00
## Max. :16.00
## V9 V10 V11 V12
## Length:32561 Length:32561 Min. : 0 Min. : 0.0
## Class :character Class :character 1st Qu.: 0 1st Qu.: 0.0
## Mode :character Mode :character Median : 0 Median : 0.0
## Mean : 1078 Mean : 87.3
## 3rd Qu.: 0 3rd Qu.: 0.0
## Max. :99999 Max. :4356.0
## V13 V14 V15
## Min. : 1.00 Length:32561 Length:32561
## 1st Qu.:40.00 Class :character Class :character
## Median :40.00 Mode :character Mode :character
## Mean :40.44
## 3rd Qu.:45.00
## Max. :99.00
ggpairs(adult[,c(1,3,5,11,12,13,15)])
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

##Add Bayesian tests functions
#create function to conduct the Bayesian Sign Test
BayesianSignTest <- function(diffVector,rope_min,rope_max) {
library(MCMCpack)
samples <- 3000
#build the vector 0.5 1 1 ....... 1
weights <- c(0.5,rep(1,length(diffVector)))
#add the fake first observation in 0
diffVector <- c (0, diffVector)
#for the moment we implement the sign test. Signedrank will follows
probLeft <- mean (diffVector < rope_min)
probRope <- mean (diffVector > rope_min & diffVector < rope_max)
probRight <- mean (diffVector > rope_max)
results = list ("probLeft"=probLeft, "probRope"=probRope,
"probRight"=probRight)
return (results)
}
##Create function to conduct Bayesian Signed Rank Test
BayesianSignedRank <- function(diffVector,rope_min,rope_max) {
library(MCMCpack)
samples <- 30000
#build the vector 0.5 1 1 ....... 1
weights <- c(0.5,rep(1,length(diffVector)))
#add the fake first observation in 0
diffVector <- c (0, diffVector)
sampledWeights <- rdirichlet(samples,weights)
winLeft <- vector(length = samples)
winRope <- vector(length = samples)
winRight <- vector(length = samples)
for (rep in 1:samples){
currentWeights <- sampledWeights[rep,]
for (i in 1:length(currentWeights)){
for (j in 1:length(currentWeights)){
product= currentWeights[i] * currentWeights[j]
if (diffVector[i]+diffVector[j] > (2*rope_max) ) {
winRight[rep] <- winRight[rep] + product
}
else if (diffVector[i]+diffVector[j] > (2*rope_min) ) {
winRope[rep] <- winRope[rep] + product
}
else {
winLeft[rep] <- winLeft[rep] + product
}
}
}
maxWins=max(winRight[rep],winRope[rep],winLeft[rep])
winners = (winRight[rep]==maxWins)*1 + (winRope[rep]==maxWins)*1 + (winLeft[rep]==maxWins)*1
winRight[rep] <- (winRight[rep]==maxWins)*1/winners
winRope[rep] <- (winRope[rep]==maxWins)*1/winners
winLeft[rep] <- (winLeft[rep]==maxWins)*1/winners
}
results = list ("winLeft"=mean(winLeft), "winRope"=mean(winRope),
"winRight"=mean(winRight) )
return (results)
}
#Create function to conduct the Bayesian Correlated t.test
#diff_a_b is a vector of differences between the two classifiers, on each fold of cross-validation.
#If you have done 10 runs of 10-folds cross-validation, you have 100 results for each classifier.
#You should have run cross-validation on the same folds for the two classifiers.
#Then diff_a_b is the difference fold-by-fold.
#rho is the correlation of the cross-validation results: 1/(number of folds)
#rope_min and rope_max are the lower and the upper bound of the rope
correlatedBayesianTtest <- function(diff_a_b,rho,rope_min,rope_max){
if (rope_max < rope_min){
stop("rope_max should be larger than rope_min")
}
delta <- mean(diff_a_b)
n <- length(diff_a_b)
df <- n-1
stdX <- sd(diff_a_b)
sp <- sd(diff_a_b)*sqrt(1/n + rho/(1-rho))
p.left <- pt((rope_min - delta)/sp, df)
p.rope <- pt((rope_max - delta)/sp, df)-p.left
results <- list('left'=p.left,'rope'=p.rope,'right'=1-p.left-p.rope)
return (results)
}
set.seed(16974)
###Prepare datasets for One hot encoding if necessary and Persistent homology of each dataset.
##One hot encoding for adult dataset
library(caret)
#define one-hot encoding function
dummy.adult <- dummyVars(" ~ .", data=adult)
#perform one-hot encoding on data frame
adult.one_hot_df <- data.frame(predict(dummy.adult, newdata=adult))
#str final data frame
head(str(adult.one_hot_df))
## 'data.frame': 32561 obs. of 110 variables:
## $ V1 : num 39 50 38 53 28 37 49 52 31 42 ...
## $ V2.. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Federal.gov : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Local.gov : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Never.worked : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Private : num 0 0 1 1 1 1 1 0 1 1 ...
## $ V2.Self.emp.inc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Self.emp.not.inc : num 0 1 0 0 0 0 0 1 0 0 ...
## $ V2.State.gov : num 1 0 0 0 0 0 0 0 0 0 ...
## $ V2.Without.pay : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V3 : num 77516 83311 215646 234721 338409 ...
## $ V4.10th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.11th : num 0 0 0 1 0 0 0 0 0 0 ...
## $ V4.12th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.1st.4th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.5th.6th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.7th.8th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.9th : num 0 0 0 0 0 0 1 0 0 0 ...
## $ V4.Assoc.acdm : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Assoc.voc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Bachelors : num 1 1 0 0 1 0 0 0 0 1 ...
## $ V4.Doctorate : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.HS.grad : num 0 0 1 0 0 0 0 1 0 0 ...
## $ V4.Masters : num 0 0 0 0 0 1 0 0 1 0 ...
## $ V4.Preschool : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Prof.school : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Some.college : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V5 : num 13 13 9 7 13 14 5 9 14 13 ...
## $ V6.Divorced : num 0 0 1 0 0 0 0 0 0 0 ...
## $ V6.Married.AF.spouse : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V6.Married.civ.spouse : num 0 1 0 1 1 1 0 1 0 1 ...
## $ V6.Married.spouse.absent : num 0 0 0 0 0 0 1 0 0 0 ...
## $ V6.Never.married : num 1 0 0 0 0 0 0 0 1 0 ...
## $ V6.Separated : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V6.Widowed : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Adm.clerical : num 1 0 0 0 0 0 0 0 0 0 ...
## $ V7.Armed.Forces : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Craft.repair : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Exec.managerial : num 0 1 0 0 0 1 0 1 0 1 ...
## $ V7.Farming.fishing : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Handlers.cleaners : num 0 0 1 1 0 0 0 0 0 0 ...
## $ V7.Machine.op.inspct : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Other.service : num 0 0 0 0 0 0 1 0 0 0 ...
## $ V7.Priv.house.serv : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Prof.specialty : num 0 0 0 0 1 0 0 0 1 0 ...
## $ V7.Protective.serv : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Sales : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Tech.support : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Transport.moving : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V8.Husband : num 0 1 0 1 0 0 0 1 0 1 ...
## $ V8.Not.in.family : num 1 0 1 0 0 0 1 0 1 0 ...
## $ V8.Other.relative : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V8.Own.child : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V8.Unmarried : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V8.Wife : num 0 0 0 0 1 1 0 0 0 0 ...
## $ V9.Amer.Indian.Eskimo : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.Asian.Pac.Islander : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.Black : num 0 0 0 1 1 0 1 0 0 0 ...
## $ V9.Other : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.White : num 1 1 1 0 0 1 0 1 1 1 ...
## $ V10.Female : num 0 0 0 0 1 1 1 0 1 0 ...
## $ V10.Male : num 1 1 1 1 0 0 0 1 0 1 ...
## $ V11 : num 2174 0 0 0 0 ...
## $ V12 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V13 : num 40 13 40 40 40 40 16 45 50 40 ...
## $ V14.. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Cambodia : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Canada : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.China : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Columbia : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Cuba : num 0 0 0 0 1 0 0 0 0 0 ...
## $ V14.Dominican.Republic : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Ecuador : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.El.Salvador : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.England : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.France : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Germany : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Greece : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Guatemala : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Haiti : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Holand.Netherlands : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Honduras : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Hong : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Hungary : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.India : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Iran : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Ireland : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Italy : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Jamaica : num 0 0 0 0 0 0 1 0 0 0 ...
## $ V14.Japan : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Laos : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Mexico : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Nicaragua : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Outlying.US.Guam.USVI.etc.: num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Peru : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Philippines : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Poland : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Portugal : num 0 0 0 0 0 0 0 0 0 0 ...
## [list output truncated]
## NULL
adult_df1<-adult[,15]
adult.one_hot_df1<-cbind(adult.one_hot_df,adult_df1)
adult.one_hot_df2<-adult.one_hot_df1[,c(1,11,28,64,65,66)]
##Persistent homology of adult dataset
#create a random sample of adult.one_hot dataset to see if a barcode and persistent diagram can resolve from size of the dataset.
adult.one_hot_1000_df <- adult.one_hot_df[sample(nrow(adult.one_hot_df), size = 1000, replace = FALSE), ]
head(str(adult.one_hot_1000_df))
## 'data.frame': 1000 obs. of 110 variables:
## $ V1 : num 33 25 39 21 32 26 20 58 24 63 ...
## $ V2.. : num 0 0 0 0 0 0 1 1 0 0 ...
## $ V2.Federal.gov : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Local.gov : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Never.worked : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Private : num 1 1 1 1 1 1 0 0 1 0 ...
## $ V2.Self.emp.inc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Self.emp.not.inc : num 0 0 0 0 0 0 0 0 0 1 ...
## $ V2.State.gov : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V2.Without.pay : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V3 : num 176992 105693 234901 198050 134886 ...
## $ V4.10th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.11th : num 0 0 0 0 0 0 0 1 0 0 ...
## $ V4.12th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.1st.4th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.5th.6th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.7th.8th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.9th : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Assoc.acdm : num 0 0 1 1 0 0 0 0 0 0 ...
## $ V4.Assoc.voc : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Bachelors : num 0 1 0 0 0 1 0 0 0 0 ...
## $ V4.Doctorate : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.HS.grad : num 0 0 0 0 1 0 0 0 1 0 ...
## $ V4.Masters : num 1 0 0 0 0 0 0 0 0 0 ...
## $ V4.Preschool : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Prof.school : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V4.Some.college : num 0 0 0 0 0 0 1 0 0 1 ...
## $ V5 : num 14 13 12 12 9 13 10 7 9 10 ...
## $ V6.Divorced : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V6.Married.AF.spouse : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V6.Married.civ.spouse : num 1 0 0 0 1 0 0 1 0 1 ...
## $ V6.Married.spouse.absent : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V6.Never.married : num 0 1 0 1 0 1 1 0 1 0 ...
## $ V6.Separated : num 0 0 1 0 0 0 0 0 0 0 ...
## $ V6.Widowed : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.. : num 0 0 0 0 0 0 1 1 0 0 ...
## $ V7.Adm.clerical : num 0 0 1 1 1 0 0 0 0 0 ...
## $ V7.Armed.Forces : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Craft.repair : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Exec.managerial : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Farming.fishing : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Handlers.cleaners : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Machine.op.inspct : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Other.service : num 0 0 0 0 0 0 0 0 1 1 ...
## $ V7.Priv.house.serv : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Prof.specialty : num 1 1 0 0 0 1 0 0 0 0 ...
## $ V7.Protective.serv : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Sales : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Tech.support : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V7.Transport.moving : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V8.Husband : num 1 0 0 0 0 0 0 1 0 1 ...
## $ V8.Not.in.family : num 0 1 0 1 0 1 0 0 0 0 ...
## $ V8.Other.relative : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V8.Own.child : num 0 0 0 0 0 0 0 0 1 0 ...
## $ V8.Unmarried : num 0 0 1 0 0 0 1 0 0 0 ...
## $ V8.Wife : num 0 0 0 0 1 0 0 0 0 0 ...
## $ V9.Amer.Indian.Eskimo : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.Asian.Pac.Islander : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.Black : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.Other : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V9.White : num 1 1 1 1 1 1 1 1 1 1 ...
## $ V10.Female : num 0 1 0 1 1 1 1 0 1 0 ...
## $ V10.Male : num 1 0 1 0 0 0 0 1 0 1 ...
## $ V11 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V12 : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V13 : num 40 40 40 25 40 40 20 16 25 48 ...
## $ V14.. : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Cambodia : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Canada : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.China : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Columbia : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Cuba : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Dominican.Republic : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Ecuador : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.El.Salvador : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.England : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.France : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Germany : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Greece : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Guatemala : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Haiti : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Holand.Netherlands : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Honduras : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Hong : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Hungary : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.India : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Iran : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Ireland : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Italy : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Jamaica : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Japan : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Laos : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Mexico : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Nicaragua : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Outlying.US.Guam.USVI.etc.: num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Peru : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Philippines : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Poland : num 0 0 0 0 0 0 0 0 0 0 ...
## $ V14.Portugal : num 0 0 0 0 0 0 0 0 0 0 ...
## [list output truncated]
## NULL
summary(adult.one_hot_1000_df)
## V1 V2.. V2.Federal.gov V2.Local.gov
## Min. :17.00 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:28.00 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :37.00 Median :0.000 Median :0.000 Median :0.000
## Mean :38.64 Mean :0.077 Mean :0.025 Mean :0.064
## 3rd Qu.:47.00 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :90.00 Max. :1.000 Max. :1.000 Max. :1.000
## V2.Never.worked V2.Private V2.Self.emp.inc V2.Self.emp.not.inc
## Min. :0 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0 Median :1.000 Median :0.000 Median :0.000
## Mean :0 Mean :0.679 Mean :0.037 Mean :0.079
## 3rd Qu.:0 3rd Qu.:1.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :0 Max. :1.000 Max. :1.000 Max. :1.000
## V2.State.gov V2.Without.pay V3 V4.10th
## Min. :0.000 Min. :0 Min. : 19302 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0 1st Qu.:123797 1st Qu.:0.000
## Median :0.000 Median :0 Median :181982 Median :0.000
## Mean :0.039 Mean :0 Mean :195583 Mean :0.041
## 3rd Qu.:0.000 3rd Qu.:0 3rd Qu.:242529 3rd Qu.:0.000
## Max. :1.000 Max. :0 Max. :721161 Max. :1.000
## V4.11th V4.12th V4.1st.4th V4.5th.6th
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.032 Mean :0.015 Mean :0.005 Mean :0.015
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V4.7th.8th V4.9th V4.Assoc.acdm V4.Assoc.voc V4.Bachelors
## Min. :0.000 Min. :0.000 Min. :0.00 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.00 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.00 Median :0.000 Median :0.000
## Mean :0.015 Mean :0.018 Mean :0.04 Mean :0.052 Mean :0.155
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.00 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.00 Max. :1.000 Max. :1.000
## V4.Doctorate V4.HS.grad V4.Masters V4.Preschool
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.014 Mean :0.327 Mean :0.053 Mean :0.002
## 3rd Qu.:0.000 3rd Qu.:1.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V4.Prof.school V4.Some.college V5 V6.Divorced
## Min. :0.000 Min. :0.000 Min. : 1 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.: 9 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :10 Median :0.000
## Mean :0.014 Mean :0.202 Mean :10 Mean :0.132
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:12 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :16 Max. :1.000
## V6.Married.AF.spouse V6.Married.civ.spouse V6.Married.spouse.absent
## Min. :0 Min. :0.000 Min. :0.000
## 1st Qu.:0 1st Qu.:0.000 1st Qu.:0.000
## Median :0 Median :0.000 Median :0.000
## Mean :0 Mean :0.464 Mean :0.005
## 3rd Qu.:0 3rd Qu.:1.000 3rd Qu.:0.000
## Max. :0 Max. :1.000 Max. :1.000
## V6.Never.married V6.Separated V6.Widowed V7..
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.325 Mean :0.041 Mean :0.033 Mean :0.077
## 3rd Qu.:1.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V7.Adm.clerical V7.Armed.Forces V7.Craft.repair V7.Exec.managerial
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.117 Mean :0.001 Mean :0.129 Mean :0.124
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V7.Farming.fishing V7.Handlers.cleaners V7.Machine.op.inspct V7.Other.service
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.029 Mean :0.041 Mean :0.071 Mean :0.091
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V7.Priv.house.serv V7.Prof.specialty V7.Protective.serv V7.Sales
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.003 Mean :0.119 Mean :0.018 Mean :0.102
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V7.Tech.support V7.Transport.moving V8.Husband V8.Not.in.family
## Min. :0.000 Min. :0.000 Min. :0.00 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.00 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.00 Median :0.000
## Mean :0.035 Mean :0.043 Mean :0.41 Mean :0.261
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:1.00 3rd Qu.:1.000
## Max. :1.000 Max. :1.000 Max. :1.00 Max. :1.000
## V8.Other.relative V8.Own.child V8.Unmarried V8.Wife
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.027 Mean :0.136 Mean :0.115 Mean :0.051
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V9.Amer.Indian.Eskimo V9.Asian.Pac.Islander V9.Black V9.Other
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.014 Mean :0.029 Mean :0.104 Mean :0.007
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V9.White V10.Female V10.Male V11
## Min. :0.000 Min. :0.000 Min. :0.000 Min. : 0.0
## 1st Qu.:1.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.: 0.0
## Median :1.000 Median :0.000 Median :1.000 Median : 0.0
## Mean :0.846 Mean :0.339 Mean :0.661 Mean : 868.9
## 3rd Qu.:1.000 3rd Qu.:1.000 3rd Qu.:1.000 3rd Qu.: 0.0
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :99999.0
## V12 V13 V14.. V14.Cambodia V14.Canada
## Min. : 0.00 Min. : 1.0 Min. :0.000 Min. :0 Min. :0.000
## 1st Qu.: 0.00 1st Qu.:40.0 1st Qu.:0.000 1st Qu.:0 1st Qu.:0.000
## Median : 0.00 Median :40.0 Median :0.000 Median :0 Median :0.000
## Mean : 92.56 Mean :40.5 Mean :0.024 Mean :0 Mean :0.003
## 3rd Qu.: 0.00 3rd Qu.:45.0 3rd Qu.:0.000 3rd Qu.:0 3rd Qu.:0.000
## Max. :2457.00 Max. :99.0 Max. :1.000 Max. :0 Max. :1.000
## V14.China V14.Columbia V14.Cuba V14.Dominican.Republic
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.003 Mean :0.002 Mean :0.005 Mean :0.002
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V14.Ecuador V14.El.Salvador V14.England V14.France
## Min. :0.000 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0.000 Median :0.000
## Mean :0.001 Mean :0.003 Mean :0.003 Mean :0.001
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :1.000 Max. :1.000
## V14.Germany V14.Greece V14.Guatemala V14.Haiti
## Min. :0.000 Min. :0.000 Min. :0 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0 Median :0.000
## Mean :0.002 Mean :0.002 Mean :0 Mean :0.002
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :0 Max. :1.000
## V14.Holand.Netherlands V14.Honduras V14.Hong V14.Hungary
## Min. :0 Min. :0 Min. :0.000 Min. :0.000
## 1st Qu.:0 1st Qu.:0 1st Qu.:0.000 1st Qu.:0.000
## Median :0 Median :0 Median :0.000 Median :0.000
## Mean :0 Mean :0 Mean :0.001 Mean :0.002
## 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :0 Max. :0 Max. :1.000 Max. :1.000
## V14.India V14.Iran V14.Ireland V14.Italy V14.Jamaica
## Min. :0.000 Min. :0 Min. :0 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0 1st Qu.:0 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0 Median :0 Median :0.000 Median :0.000
## Mean :0.004 Mean :0 Mean :0 Mean :0.003 Mean :0.003
## 3rd Qu.:0.000 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :0 Max. :0 Max. :1.000 Max. :1.000
## V14.Japan V14.Laos V14.Mexico V14.Nicaragua
## Min. :0.000 Min. :0 Min. :0.000 Min. :0
## 1st Qu.:0.000 1st Qu.:0 1st Qu.:0.000 1st Qu.:0
## Median :0.000 Median :0 Median :0.000 Median :0
## Mean :0.003 Mean :0 Mean :0.022 Mean :0
## 3rd Qu.:0.000 3rd Qu.:0 3rd Qu.:0.000 3rd Qu.:0
## Max. :1.000 Max. :0 Max. :1.000 Max. :0
## V14.Outlying.US.Guam.USVI.etc. V14.Peru V14.Philippines V14.Poland
## Min. :0 Min. :0 Min. :0.000 Min. :0.000
## 1st Qu.:0 1st Qu.:0 1st Qu.:0.000 1st Qu.:0.000
## Median :0 Median :0 Median :0.000 Median :0.000
## Mean :0 Mean :0 Mean :0.004 Mean :0.002
## 3rd Qu.:0 3rd Qu.:0 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :0 Max. :0 Max. :1.000 Max. :1.000
## V14.Portugal V14.Puerto.Rico V14.Scotland V14.South V14.Taiwan
## Min. :0.000 Min. :0.000 Min. :0 Min. :0.000 Min. :0.000
## 1st Qu.:0.000 1st Qu.:0.000 1st Qu.:0 1st Qu.:0.000 1st Qu.:0.000
## Median :0.000 Median :0.000 Median :0 Median :0.000 Median :0.000
## Mean :0.001 Mean :0.004 Mean :0 Mean :0.001 Mean :0.001
## 3rd Qu.:0.000 3rd Qu.:0.000 3rd Qu.:0 3rd Qu.:0.000 3rd Qu.:0.000
## Max. :1.000 Max. :1.000 Max. :0 Max. :1.000 Max. :1.000
## V14.Thailand V14.Trinadad.Tobago V14.United.States V14.Vietnam
## Min. :0 Min. :0.000 Min. :0.000 Min. :0.000
## 1st Qu.:0 1st Qu.:0.000 1st Qu.:1.000 1st Qu.:0.000
## Median :0 Median :0.000 Median :1.000 Median :0.000
## Mean :0 Mean :0.002 Mean :0.891 Mean :0.003
## 3rd Qu.:0 3rd Qu.:0.000 3rd Qu.:1.000 3rd Qu.:0.000
## Max. :0 Max. :1.000 Max. :1.000 Max. :1.000
## V14.Yugoslavia V15...50K V15..50K
## Min. :0 Min. :0.000 Min. :0.000
## 1st Qu.:0 1st Qu.:1.000 1st Qu.:0.000
## Median :0 Median :1.000 Median :0.000
## Mean :0 Mean :0.769 Mean :0.231
## 3rd Qu.:0 3rd Qu.:1.000 3rd Qu.:0.000
## Max. :0 Max. :1.000 Max. :1.000
# calculate persistent homology for adult.one_hot_1000_df
phom.adult.one_hot_1000_df <- calculate_homology(adult.one_hot_1000_df)
# plot barcode for adult.one_hot_1000_df
plot_barcode(phom.adult.one_hot_1000_df)

# plot persistent diagram of adult.one_hot_1000_df dataset
plot_persist(phom.adult.one_hot_1000_df)

#####———————————————MAPPER ALGORITHM————————————————
#Prepare Adult dataset for Mapper 1D algorithm
adult_df1<-adult[,15]
adult.one_hot_df1<-cbind(adult.one_hot_df,adult_df1)
adult.one_hot_df2<-adult.one_hot_df1[,c(1,11,28,64,65,66,109)]
adult.one_hot_df3<-adult.one_hot_df1[,c(1,11,28,62,63,64,65,66)]
adult.one_hot_df4<-adult.one_hot_df1[,-c(109,110)]
##Two Filter Functions PCA & KDE
#Prepare linear PCA as a filter function by centering and scaling dataset first on all one hot df dataset
b<- prcomp(adult.one_hot_df, center=TRUE, scale=TRUE)
ts_pca_b <- as.data.frame(predict(b, adult.one_hot_df))
#Conduct kernel density estimator as a filter function on 4 of 6
filter.kde <- kde(adult.one_hot_df3[,1:4],H=diag(1,nrow = 4),eval.points =adult.one_hot_df3[,1:4])$estimate
##*** Adult Mapper 5 intervals, 40% overlap, 5 bins
m_adult_5.40.5 <- mapper1D(
distance_matrix = dist(adult.one_hot_df),
filter_values = c(ts_pca_b$PC1),
num_intervals = 5,
percent_overlap = 40,
num_bins_when_clustering = 5)
g_adult_5.40.5 <- graph.adjacency(m_adult_5.40.5$adjacency, mode="undirected")
## Warning: `graph.adjacency()` was deprecated in igraph 2.0.0.
## ℹ Please use `graph_from_adjacency_matrix()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
plot(g_adult_5.40.5, layout = layout.auto(g_adult_5.40.5))
## Warning: `layout.auto()` was deprecated in igraph 2.0.0.
## ℹ Please use `layout_nicely()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

head(str(m_adult_5.40.5$level_of_vertex))
## int [1:5] 1 2 3 4 5
## NULL
head(str(m_adult_5.40.5$vertices_in_level))
## List of 5
## $ : num 1
## $ : num 2
## $ : num 3
## $ : num 4
## $ : num 5
## NULL
head(str(m_adult_5.40.5$points_in_vertex))
## List of 5
## $ : int [1:3373] 8 10 12 21 26 46 64 69 73 95 ...
## $ : int [1:10276] 2 8 10 11 12 15 21 24 26 28 ...
## $ : int [1:11563] 1 2 4 6 9 16 19 20 23 24 ...
## $ : int [1:14818] 1 3 4 5 6 9 13 14 16 17 ...
## $ : int [1:12081] 7 13 14 18 22 25 27 32 36 37 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_adult_5.40.5$level_of_vertex, na.rm=TRUE)
my_vector = m_adult_5.40.5$level_of_vertex / my_max
my_colors = my_palette(my_resolution)[as.numeric(cut(
my_vector, breaks=my_resolution))]
g_adult_5.40.5 <- graph.adjacency(m_adult_5.40.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_adult_5.40.5$points_in_vertex,
function(x) length(x)))
plot(g_adult_5.40.5, layout = layout.auto(g_adult_5.40.5),
vertex.size = 30*log(vertex_size)/
max(log(vertex_size)),
vertex.color = my_colors)

m_adult_5.40.5.n1<-m_adult_5.40.5$points_in_vertex[1]
m_adult_5.40.5.n1.vec<-as.vector(unlist(m_adult_5.40.5.n1))
m_adult_5.40.5.n2<-m_adult_5.40.5$points_in_vertex[2]
m_adult_5.40.5.n2.vec<-as.vector(unlist(m_adult_5.40.5.n2))
m_adult_5.40.5.n3<-m_adult_5.40.5$points_in_vertex[3]
m_adult_5.40.5.n3.vec<-as.vector(unlist(m_adult_5.40.5.n3))
m_adult_5.40.5.n4<-m_adult_5.40.5$points_in_vertex[4]
m_adult_5.40.5.n4.vec<-as.vector(unlist(m_adult_5.40.5.n4))
m_adult_5.40.5.n5<-m_adult_5.40.5$points_in_vertex[5]
m_adult_5.40.5.n5.vec<-as.vector(unlist(m_adult_5.40.5.n5))
##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF1 dataset
tda.m_adult_5.40.5.n1.vec<-adult.one_hot_df4[m_adult_5.40.5.n1.vec,]
tda.m_adult_5.40.5.n2.vec<-adult.one_hot_df4[m_adult_5.40.5.n2.vec,]
tda.m_adult_5.40.5.n3.vec<-adult.one_hot_df4[m_adult_5.40.5.n3.vec,]
tda.m_adult_5.40.5.n4.vec<-adult.one_hot_df4[m_adult_5.40.5.n4.vec,]
tda.m_adult_5.40.5.n5.vec<-adult.one_hot_df4[m_adult_5.40.5.n5.vec,]
##*** Adult Mapper KDE 5 intervals, 40% overlap, 5 bins
m_kde_adult_5.40.5 <- mapper1D(
distance_matrix = dist(adult.one_hot_df),
filter_values = c(filter.kde),
num_intervals = 5,
percent_overlap = 40,
num_bins_when_clustering = 5)
g_kde_adult_5.40.5 <- graph.adjacency(m_kde_adult_5.40.5$adjacency, mode="undirected")
plot(g_kde_adult_5.40.5, layout = layout.auto(g_kde_adult_5.40.5))

head(str(m_kde_adult_5.40.5$level_of_vertex))
## int [1:5] 1 2 3 4 5
## NULL
head(str(m_kde_adult_5.40.5$vertices_in_level))
## List of 5
## $ : num 1
## $ : num 2
## $ : num 3
## $ : num 4
## $ : num 5
## NULL
head(str(m_kde_adult_5.40.5$points_in_vertex))
## List of 5
## $ : int [1:11838] 4 5 6 7 9 16 19 20 21 22 ...
## $ : int [1:11203] 1 2 6 9 13 20 24 25 26 29 ...
## $ : int [1:10351] 1 8 10 11 12 14 27 28 30 31 ...
## $ : int [1:8741] 3 10 11 12 14 15 27 30 32 34 ...
## $ : int [1:6628] 3 15 17 18 37 39 59 60 65 66 ...
## NULL
my_resolution = 100
my_palette = colorRampPalette(c('red','green','lightblue'))
my_max = max(m_kde_adult_5.40.5$level_of_vertex, na.rm=TRUE)
my_vector = m_kde_adult_5.40.5$level_of_vertex / my_max
my_colors = my_palette(my_resolution)[as.numeric(cut(
my_vector, breaks=my_resolution))]
g_kde_adult_5.40.5 <- graph.adjacency(m_kde_adult_5.40.5$adjacency, mode="undirected")
vertex_size <- unlist(lapply(m_kde_adult_5.40.5$points_in_vertex,
function(x) length(x)))
plot(g_kde_adult_5.40.5, layout = layout.auto(g_kde_adult_5.40.5),
vertex.size = 30*log(vertex_size)/
max(log(vertex_size)),
vertex.color = my_colors)

##Extract the ID observations of each mapper output vertex
m_kde_adult_5.40.5.n1<-m_kde_adult_5.40.5$points_in_vertex[1]
m_kde_adult_5.40.5.n1.vec<-as.vector(unlist(m_kde_adult_5.40.5.n1))
m_kde_adult_5.40.5.n2<-m_kde_adult_5.40.5$points_in_vertex[2]
m_kde_adult_5.40.5.n2.vec<-as.vector(unlist(m_kde_adult_5.40.5.n2))
m_kde_adult_5.40.5.n3<-m_kde_adult_5.40.5$points_in_vertex[3]
m_kde_adult_5.40.5.n3.vec<-as.vector(unlist(m_kde_adult_5.40.5.n3))
m_kde_adult_5.40.5.n4<-m_kde_adult_5.40.5$points_in_vertex[4]
m_kde_adult_5.40.5.n4.vec<-as.vector(unlist(m_kde_adult_5.40.5.n4))
m_kde_adult_5.40.5.n5<-m_kde_adult_5.40.5 $points_in_vertex[5]
m_kde_adult_5.40.5.n5.vec<-as.vector(unlist(m_kde_adult_5.40.5.n5))
##map the ID’s of each Mapper vertex point to the actual Adult One Hot DF4 dataset
tda.m_kde_adult_5.40.5.n1.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n1.vec,]
tda.m_kde_adult_5.40.5.n2.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n2.vec,]
tda.m_kde_adult_5.40.5.n3.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n3.vec,]
tda.m_kde_adult_5.40.5.n4.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n4.vec,]
tda.m_kde_adult_5.40.5.n5.vec<-adult.one_hot_df4[m_kde_adult_5.40.5.n5.vec,]
library(caret)
#Prepare Adult dataset for Mapper 1D algorithm
adult_df1<-adult[,15]
adult.one_hot_df1<-cbind(adult.one_hot_df,adult_df1)
adult.one_hot_df2<-adult.one_hot_df1[,c(1,11,28,64,65,66,109)]
adult.one_hot_df3<-adult.one_hot_df1[,c(1,11,28,62,63,64,65,66)]
adult.one_hot_df4<-adult.one_hot_df1[,-c(109,110)]
trainIndex <- createDataPartition(adult.one_hot_df4$adult_df1, p = .7,
list = FALSE,
times = 1)
head(trainIndex)
## Resample1
## [1,] 1
## [2,] 2
## [3,] 4
## [4,] 5
## [5,] 7
## [6,] 8
adult.one_hot_df4Train <- adult.one_hot_df4[ trainIndex,]
adult.one_hot_df4Test <- adult.one_hot_df4[-trainIndex,]
#Train Control: k-Fold Cross-validation basis for all models
fitControl <- trainControl(## 10-fold CV
method = "cv",
number = 3)
#Non-TDA-Assited
rfGrid<-expand.grid(mtry = (1:20)*50)
#Random Forest
adultRfFit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
adultRfFit
## Random Forest
##
## 22793 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 15196, 15195, 15195
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.8572372 0.5882934
## 100 0.8546924 0.5828444
## 150 0.8549557 0.5839639
## 200 0.8541660 0.5813642
## 250 0.8546925 0.5827775
## 300 0.8542976 0.5810137
## 350 0.8535957 0.5792854
## 400 0.8531569 0.5788817
## 450 0.8530691 0.5783394
## 500 0.8532884 0.5784096
## 550 0.8538589 0.5809612
## 600 0.8533324 0.5787809
## 650 0.8542099 0.5817356
## 700 0.8543415 0.5823679
## 750 0.8538588 0.5805136
## 800 0.8528498 0.5773478
## 850 0.8537712 0.5804901
## 900 0.8536833 0.5798539
## 950 0.8535079 0.5788895
## 1000 0.8543854 0.5818180
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
adultRfFit$resample
## Accuracy Kappa Resample
## 1 0.8636304 0.6053166 Fold1
## 2 0.8575941 0.5924862 Fold3
## 3 0.8504870 0.5670776 Fold2
ad_rf_fit_re<-adultRfFit$resample[1]
summary(adultRfFit)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 22793 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 45586 matrix numeric
## oob.times 22793 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 22793 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(adultRfFit,25) + ggtitle("non-TDA-Assisted: RF")

# Predict outcome using model from training data based on testing data
predictions <- predict(adultRfFit, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
rf_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
rf_cf
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6929 891
## >50K 487 1461
##
## Accuracy : 0.8589
## 95% CI : (0.8519, 0.8658)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5901
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9343
## Specificity : 0.6212
## Pos Pred Value : 0.8861
## Neg Pred Value : 0.7500
## Prevalence : 0.7592
## Detection Rate : 0.7094
## Detection Prevalence : 0.8006
## Balanced Accuracy : 0.7778
##
## 'Positive' Class : <=50K
##
rf_cf$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.589271e-01 5.901123e-01 8.518658e-01 8.657737e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 4.976785e-132 1.862103e-27
rf_cf_ov_acc<-rf_cf$overall[1]
rf_cf$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9343312 0.6211735 0.8860614
## Neg Pred Value Precision Recall
## 0.7500000 0.8860614 0.9343312
## F1 Prevalence Detection Rate
## 0.9095563 0.7592138 0.7093571
## Detection Prevalence Balanced Accuracy
## 0.8005733 0.7777523
rf_cf_pre_rec_f1<-rf_cf$byClass[5:7]
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_PC_5.40.5_n1_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n1.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated,
## V6.Widowed, V7.Other.service, V7.Priv.house.serv, V8.Other.relative,
## V8.Own.child, V8.Unmarried, V14.Cambodia, V14.Columbia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Jamaica,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago,
## V14.Vietnam
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.1st.4th, V4.5th.6th, V4.9th, V4.Preschool, V6.Married.spouse.absent,
## V6.Never.married, V6.Separated, V6.Widowed, V7.Priv.house.serv,
## V8.Other.relative, V8.Own.child, V14.Cambodia, V14.Dominican.Republic,
## V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hungary,
## V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru,
## V14.Portugal, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.12th, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child,
## V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua,
## V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed,
## V7.Priv.house.serv, V8.Other.relative, V8.Own.child, V14.Cambodia,
## V14.Dominican.Republic, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands,
## V14.Honduras, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.,
## V14.Trinadad.Tobago
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
Adult_TDA_PC_5.40.5_n1_RfFit0
## Random Forest
##
## 3373 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 2249, 2248, 2249
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.9925884 0.07366337
## 100 0.9925884 0.13347301
## 150 0.9922921 0.13293883
## 200 0.9925884 0.13347301
## 250 0.9922921 0.13293883
## 300 0.9922918 0.12810235
## 350 0.9928849 0.13990230
## 400 0.9925884 0.13347301
## 450 0.9925884 0.13347301
## 500 0.9916990 0.07215578
## 550 0.9922921 0.13293883
## 600 0.9919953 0.12354874
## 650 0.9919955 0.12756816
## 700 0.9925884 0.13347301
## 750 0.9925884 0.13347301
## 800 0.9922918 0.12810235
## 850 0.9925884 0.13347301
## 900 0.9922918 0.12571495
## 950 0.9925884 0.13347301
## 1000 0.9922918 0.12810235
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 350.
Adult_TDA_PC_5.40.5_n1_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.9920000 0.0000000 Fold2
## 2 0.9937722 0.2209901 Fold1
## 3 0.9928826 0.1987168 Fold3
ad_tda_pc_5.40.5_n1_rf_fit0_re<-Adult_TDA_PC_5.40.5_n1_RfFit0$resample[1]
summary(Adult_TDA_PC_5.40.5_n1_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 3373 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 6746 matrix numeric
## oob.times 3373 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 3373 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_PC_5.40.5_n1_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n1_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.40.5_n1_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n1_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n1_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n1_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 5 0
## >50K 7411 2352
##
## Accuracy : 0.2413
## 95% CI : (0.2328, 0.2499)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 3e-04
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0006742
## Specificity : 1.0000000
## Pos Pred Value : 1.0000000
## Neg Pred Value : 0.2409096
## Prevalence : 0.7592138
## Detection Rate : 0.0005119
## Detection Prevalence : 0.0005119
## Balanced Accuracy : 0.5003371
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n1_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 5 0
## >50K 7411 2352
##
## Accuracy : 0.2413
## 95% CI : (0.2328, 0.2499)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 3e-04
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0006742
## Specificity : 1.0000000
## Pos Pred Value : 1.0000000
## Neg Pred Value : 0.2409096
## Prevalence : 0.7592138
## Detection Rate : 0.0005119
## Detection Prevalence : 0.0005119
## Balanced Accuracy : 0.5003371
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n1_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.2412981163 0.0003247983 0.2328399276 0.2499108150 0.7592137592
## AccuracyPValue McnemarPValue
## 1.0000000000 0.0000000000
ad_tda_pc_5.40.5_n1_rf_cf0_ov_acc<-ad_tda_pc_5.40.5_n1_rf_cf0$overall[1]
ad_tda_pc_5.40.5_n1_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.0006742179 1.0000000000 1.0000000000
## Neg Pred Value Precision Recall
## 0.2409095565 1.0000000000 0.0006742179
## F1 Prevalence Detection Rate
## 0.0013475273 0.7592137592 0.0005118755
## Detection Prevalence Balanced Accuracy
## 0.0005118755 0.5003371090
ad_tda_pc_5.40.5_n1_rf_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n1_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.40.5_rf_n1_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.40.5_n1_rf_fit0_re)
diff_tda_pca_5.40.5_rf_n1_3_fold
## Accuracy
## 1 -0.1283696
## 2 -0.1361781
## 3 -0.1423956
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_rf.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_rf_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_rf.n1_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_rf.n1_3_fold_odds.left<-bst_tda_pca_5.40.5_rf.n1_3_fold$probLeft/bst_tda_pca_5.40.5_rf.n1_3_fold$probRight
bst_tda_pca_5.40.5_rf.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_rf.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_rf_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_rf.n1_3_fold
## $winLeft
## [1] 0.9925667
##
## $winRope
## [1] 0.007433333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.40.5_rf.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_rf_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_rf.n1_3_fold
## $left
## [1] 0.9993062
##
## $rope
## [1] 0.0001771861
##
## $right
## [1] 0.0005166198
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.40.5_rf_n1_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_rf_n1_3_fold))
#bf_tda_pca_5.40.5_rf.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_rf_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_rf_n1_3_fold)
## t = -33.43, df = 2, p-value = 0.0008936
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1531063 -0.1181892
## sample estimates:
## mean of x
## -0.1356478
### Test set diff
diff_tda_pca_5.40.5_rf.n1_test<-(rf_cf_ov_acc-ad_tda_pc_5.40.5_n1_rf_cf0_ov_acc)
diff_tda_pca_5.40.5_rf.n1_test
## Accuracy
## 0.617629
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_rf.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_rf.n1_test),-0.01,0.01)
bst_tda_pca_5.40.5_rf.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_rf.n1_test_odds.left<-bst_tda_pca_5.40.5_rf.n1_test$probLeft/bst_tda_pca_5.40.5_rf.n1_test$probRight
bst_tda_pca_5.40.5_rf.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_rf.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_rf.n1_test),-0.01,0.01)
bsr_tda_pca_5.40.5_rf.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1570667
##
## $winRight
## [1] 0.8429333
# Bayesian Correlated Test
bct_tda_pca_5.40.5_rf.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_rf.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_rf.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_rf.n1_test)))
#BayesFactor
#bf_tda_pca_5.40.5_rf.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_rf.n1_test)) #bf_tda_pca_5.40.5_rf.n1_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_rf.n1_test))
##Node2
Adult_TDA_PC_5.40.5_n2_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n2.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Honduras,
## V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Laos,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
Adult_TDA_PC_5.40.5_n2_RfFit0
## Random Forest
##
## 10276 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 6851, 6851, 6850
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.7335542 0.4367726
## 100 0.7282992 0.4257392
## 150 0.7267423 0.4225249
## 200 0.7261581 0.4218180
## 250 0.7256720 0.4203823
## 300 0.7277149 0.4246847
## 350 0.7271316 0.4232567
## 400 0.7285910 0.4269845
## 450 0.7285909 0.4266089
## 500 0.7265476 0.4222618
## 550 0.7265474 0.4224339
## 600 0.7278130 0.4244236
## 650 0.7284939 0.4263608
## 700 0.7268396 0.4227543
## 750 0.7276179 0.4242865
## 800 0.7281044 0.4255446
## 850 0.7284933 0.4255964
## 900 0.7275207 0.4242161
## 950 0.7244067 0.4172622
## 1000 0.7281044 0.4257152
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_PC_5.40.5_n2_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.7462774 0.4633532 Fold1
## 2 0.7302977 0.4283172 Fold3
## 3 0.7240876 0.4186475 Fold2
ad_tda_pc_5.40.5_n2_rf_fit0_re<-Adult_TDA_PC_5.40.5_n2_RfFit0$resample[1]
summary(Adult_TDA_PC_5.40.5_n2_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 10276 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 20552 matrix numeric
## oob.times 10276 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 10276 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_PC_5.40.5_n2_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n2_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.40.5_n2_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n2_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n2_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n2_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 1367 7
## >50K 6049 2345
##
## Accuracy : 0.38
## 95% CI : (0.3704, 0.3897)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0966
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.1843
## Specificity : 0.9970
## Pos Pred Value : 0.9949
## Neg Pred Value : 0.2794
## Prevalence : 0.7592
## Detection Rate : 0.1399
## Detection Prevalence : 0.1407
## Balanced Accuracy : 0.5907
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n2_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 1367 7
## >50K 6049 2345
##
## Accuracy : 0.38
## 95% CI : (0.3704, 0.3897)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0966
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.1843
## Specificity : 0.9970
## Pos Pred Value : 0.9949
## Neg Pred Value : 0.2794
## Prevalence : 0.7592
## Detection Rate : 0.1399
## Detection Prevalence : 0.1407
## Balanced Accuracy : 0.5907
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n2_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.38001638 0.09661567 0.37037636 0.38972799 0.75921376
## AccuracyPValue McnemarPValue
## 1.00000000 0.00000000
ad_tda_pc_5.40.5_n2_rf_cf0_ov_acc<-ad_tda_pc_5.40.5_n2_rf_cf0$overall[1]
ad_tda_pc_5.40.5_n2_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.1843312 0.9970238 0.9949054
## Neg Pred Value Precision Recall
## 0.2793662 0.9949054 0.1843312
## F1 Prevalence Detection Rate
## 0.3110353 0.7592138 0.1399468
## Detection Prevalence Balanced Accuracy
## 0.1406634 0.5906775
ad_tda_pc_5.40.5_n2_rf_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n2_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.40.5_rf_n2_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.40.5_n2_rf_fit0_re)
diff_tda_pca_5.40.5_rf_n2_3_fold
## Accuracy
## 1 0.1173530
## 2 0.1272964
## 3 0.1263994
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_rf.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_rf_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_rf.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_rf.n2_3_fold_odds.left<-bst_tda_pca_5.40.5_rf.n2_3_fold$probLeft/bst_tda_pca_5.40.5_rf.n2_3_fold$probRight
bst_tda_pca_5.40.5_rf.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_rf.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_rf_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_rf.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.01003333
##
## $winRight
## [1] 0.9899667
# Bayesian Correlated Test
bct_tda_pca_5.40.5_rf.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_rf_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_rf.n2_3_fold
## $left
## [1] 0.000375751
##
## $rope
## [1] 0.0001436159
##
## $right
## [1] 0.9994806
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.40.5_rf_n2_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_rf_n2_3_fold))
#bf_tda_pca_5.40.5_rf.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_rf_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_rf_n2_3_fold)
## t = 38.949, df = 2, p-value = 0.0006585
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.1100197 0.1373461
## sample estimates:
## mean of x
## 0.1236829
### Test set diff
diff_tda_pca_5.40.5_rf.n2_test<-(rf_cf_ov_acc-ad_tda_pc_5.40.5_n2_rf_cf0_ov_acc)
diff_tda_pca_5.40.5_rf.n2_test
## Accuracy
## 0.4789107
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_rf.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_rf.n2_test),-0.01,0.01)
bst_tda_pca_5.40.5_rf.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_rf.n2_test_odds.left<-bst_tda_pca_5.40.5_rf.n2_test$probLeft/bst_tda_pca_5.40.5_rf.n2_test$probRight
bst_tda_pca_5.40.5_rf.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_rf.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_rf.n2_test),-0.01,0.01)
bsr_tda_pca_5.40.5_rf.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1592667
##
## $winRight
## [1] 0.8407333
# Bayesian Correlated Test
bct_tda_pca_5.40.5_rf.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_rf.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_rf.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(as.matrix(diff_tda_pca_5.40.5_rf.n2_test),c(-0.01,0.01)))
#BayesFactor
#bf_tda_pca_5.40.5_rf.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_rf.n2_test)) #bf_tda_pca_5.40.5_rf.n2_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_rf.n2_test))
##Node3
Adult_TDA_PC_5.40.5_n3_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n3.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V7.Priv.house.serv,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
Adult_TDA_PC_5.40.5_n3_RfFit0
## Random Forest
##
## 11563 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7708, 7709, 7709
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.8753780 0.5837723
## 100 0.8715727 0.5762723
## 150 0.8713133 0.5761533
## 200 0.8708808 0.5754117
## 250 0.8708812 0.5751869
## 300 0.8707082 0.5742771
## 350 0.8709674 0.5753250
## 400 0.8721782 0.5784881
## 450 0.8698431 0.5720834
## 500 0.8711402 0.5753669
## 550 0.8713133 0.5759419
## 600 0.8705349 0.5735405
## 650 0.8713133 0.5772845
## 700 0.8708808 0.5745360
## 750 0.8711404 0.5760373
## 800 0.8709675 0.5744506
## 850 0.8707080 0.5742496
## 900 0.8706215 0.5736133
## 950 0.8707944 0.5742088
## 1000 0.8706215 0.5743404
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_PC_5.40.5_n3_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8793774 0.5999564 Fold1
## 2 0.8692268 0.5626484 Fold3
## 3 0.8775298 0.5887122 Fold2
ad_tda_pc_5.40.5_n3_rf_fit0_re<-Adult_TDA_PC_5.40.5_n3_RfFit0$resample[1]
summary(Adult_TDA_PC_5.40.5_n3_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 11563 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 23126 matrix numeric
## oob.times 11563 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 11563 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_PC_5.40.5_n3_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n3_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.40.5_n3_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n3_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n3_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n3_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 4125 1308
## >50K 3291 1044
##
## Accuracy : 0.5292
## 95% CI : (0.5192, 0.5391)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 1e-04
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.5562
## Specificity : 0.4439
## Pos Pred Value : 0.7592
## Neg Pred Value : 0.2408
## Prevalence : 0.7592
## Detection Rate : 0.4223
## Detection Prevalence : 0.5562
## Balanced Accuracy : 0.5001
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n3_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 4125 1308
## >50K 3291 1044
##
## Accuracy : 0.5292
## 95% CI : (0.5192, 0.5391)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 1e-04
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.5562
## Specificity : 0.4439
## Pos Pred Value : 0.7592
## Neg Pred Value : 0.2408
## Prevalence : 0.7592
## Detection Rate : 0.4223
## Detection Prevalence : 0.5562
## Balanced Accuracy : 0.5001
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n3_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.291769e-01 8.333560e-05 5.192196e-01 5.391168e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 9.019515e-188
ad_tda_pc_5.40.5_n3_rf_cf0_ov_acc<-ad_tda_pc_5.40.5_n3_rf_cf0$overall[1]
ad_tda_pc_5.40.5_n3_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.5562298 0.4438776 0.7592490
## Neg Pred Value Precision Recall
## 0.2408304 0.7592490 0.5562298
## F1 Prevalence Detection Rate
## 0.6420733 0.7592138 0.4222973
## Detection Prevalence Balanced Accuracy
## 0.5562039 0.5000537
ad_tda_pc_5.40.5_n3_rf_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n3_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.40.5_rf_n3_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.40.5_n3_rf_fit0_re)
diff_tda_pca_5.40.5_rf_n3_3_fold
## Accuracy
## 1 -0.01574705
## 2 -0.01163267
## 3 -0.02704287
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_rf.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_rf_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_rf.n3_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_rf.n3_3_fold_odds.left<-bst_tda_pca_5.40.5_rf.n3_3_fold$probLeft/bst_tda_pca_5.40.5_rf.n3_3_fold$probRight
bst_tda_pca_5.40.5_rf.n3_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_rf.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_rf_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_rf.n3_3_fold
## $winLeft
## [1] 0.9109333
##
## $winRope
## [1] 0.08906667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.40.5_rf.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_rf_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_rf.n2_3_fold
## $left
## [1] 0.000375751
##
## $rope
## [1] 0.0001436159
##
## $right
## [1] 0.9994806
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.40.5_rf_n3_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_rf_n3_3_fold))
#bf_tda_pca_5.40.5_rf.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_rf_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_rf_n3_3_fold)
## t = -3.9379, df = 2, p-value = 0.05885
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.037962087 0.001680357
## sample estimates:
## mean of x
## -0.01814086
### Test set diff
diff_tda_pca_5.40.5_rf.n3_test<-(rf_cf_ov_acc-ad_tda_pc_5.40.5_n3_rf_cf0_ov_acc)
diff_tda_pca_5.40.5_rf.n3_test
## Accuracy
## 0.3297502
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_rf.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_rf.n3_test),-0.01,0.01)
bst_tda_pca_5.40.5_rf.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_rf.n3_test_odds.left<-bst_tda_pca_5.40.5_rf.n3_test$probLeft/bst_tda_pca_5.40.5_rf.n3_test$probRight
bst_tda_pca_5.40.5_rf.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_rf.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_rf.n3_test),-0.01,0.01)
bsr_tda_pca_5.40.5_rf.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1555
##
## $winRight
## [1] 0.8445
# Bayesian Correlated Test
bct_tda_pca_5.40.5_rf.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_rf.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_rf.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_rf.n3_test))
#BayesFactor
#bf_tda_pca_5.40.5_rf.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_rf.n3_test)) #bf_tda_pca_5.40.5_rf.n3_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_rf.n2_test)
##Node4
Adult_TDA_PC_5.40.5_n4_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n4.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
Adult_TDA_PC_5.40.5_n4_RfFit0
## Random Forest
##
## 14818 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9878, 9879, 9879
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.9644351 0.3632696
## 100 0.9630854 0.3515652
## 150 0.9631529 0.3550414
## 200 0.9632204 0.3579904
## 250 0.9631529 0.3549535
## 300 0.9634228 0.3568652
## 350 0.9632204 0.3567692
## 400 0.9630854 0.3545482
## 450 0.9634228 0.3598996
## 500 0.9632878 0.3561166
## 550 0.9628829 0.3543434
## 600 0.9631529 0.3536049
## 650 0.9631529 0.3549535
## 700 0.9629504 0.3561752
## 750 0.9633554 0.3576292
## 800 0.9632878 0.3599696
## 850 0.9633553 0.3565252
## 900 0.9631529 0.3551875
## 950 0.9631529 0.3550018
## 1000 0.9631529 0.3549273
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_PC_5.40.5_n4_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.9655870 0.3879696 Fold1
## 2 0.9629480 0.3162524 Fold3
## 3 0.9647702 0.3855867 Fold2
ad_tda_pc_5.40.5_n4_rf_fit0_re<-Adult_TDA_PC_5.40.5_n4_RfFit0$resample[1]
summary(Adult_TDA_PC_5.40.5_n4_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 14818 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 29636 matrix numeric
## oob.times 14818 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 14818 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_PC_5.40.5_n4_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n4_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.40.5_n4_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n4_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n4_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n4_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7412 1780
## >50K 4 572
##
## Accuracy : 0.8174
## 95% CI : (0.8096, 0.825)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3269
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9995
## Specificity : 0.2432
## Pos Pred Value : 0.8064
## Neg Pred Value : 0.9931
## Prevalence : 0.7592
## Detection Rate : 0.7588
## Detection Prevalence : 0.9410
## Balanced Accuracy : 0.6213
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n4_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7412 1780
## >50K 4 572
##
## Accuracy : 0.8174
## 95% CI : (0.8096, 0.825)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3269
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9995
## Specificity : 0.2432
## Pos Pred Value : 0.8064
## Neg Pred Value : 0.9931
## Prevalence : 0.7592
## Detection Rate : 0.7588
## Detection Prevalence : 0.9410
## Balanced Accuracy : 0.6213
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n4_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.173628e-01 3.269485e-01 8.095553e-01 8.249806e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 6.438822e-44 0.000000e+00
ad_tda_pc_5.40.5_n4_rf_cf0_ov_acc<-ad_tda_pc_5.40.5_n4_rf_cf0$overall[1]
ad_tda_pc_5.40.5_n4_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9994606 0.2431973 0.8063534
## Neg Pred Value Precision Recall
## 0.9930556 0.8063534 0.9994606
## F1 Prevalence Detection Rate
## 0.8925819 0.7592138 0.7588043
## Detection Prevalence Balanced Accuracy
## 0.9410319 0.6213290
ad_tda_pc_5.40.5_n4_rf_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n4_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.40.5_rf_n4_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.40.5_n4_rf_fit0_re)
diff_tda_pca_5.40.5_rf_n4_3_fold
## Accuracy
## 1 -0.1019567
## 2 -0.1053539
## 3 -0.1142832
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_rf.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_rf_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_rf.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_rf.n4_3_fold_odds.left<-bst_tda_pca_5.40.5_rf.n4_3_fold$probLeft/bst_tda_pca_5.40.5_rf.n4_3_fold$probRight
bst_tda_pca_5.40.5_rf.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_rf.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_rf_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_rf.n4_3_fold
## $winLeft
## [1] 0.9906
##
## $winRope
## [1] 0.0094
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.40.5_rf.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_rf_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_rf.n4_3_fold
## $left
## [1] 0.9990492
##
## $rope
## [1] 0.0002962323
##
## $right
## [1] 0.0006545455
# Rope Plot
plot(rope(as.matrix(diff_tda_pca_5.40.5_rf_n4_3_fold),c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_rf_n4_3_fold))
#bf_tda_pca_5.40.5_rf.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_rf_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_rf_n4_3_fold)
## t = -29.162, df = 2, p-value = 0.001174
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.12301398 -0.09138185
## sample estimates:
## mean of x
## -0.1071979
### Test set diff
diff_tda_pca_5.40.5_rf.n4_test<-(rf_cf_ov_acc-ad_tda_pc_5.40.5_n4_rf_cf0_ov_acc)
diff_tda_pca_5.40.5_rf.n4_test
## Accuracy
## 0.04156429
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_rf.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_rf.n4_test),-0.01,0.01)
bst_tda_pca_5.40.5_rf.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_rf.n4_test_odds.left<-bst_tda_pca_5.40.5_rf.n4_test$probLeft/bst_tda_pca_5.40.5_rf.n4_test$probRight
bst_tda_pca_5.40.5_rf.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_rf.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_rf.n4_test),-0.01,0.01)
bsr_tda_pca_5.40.5_rf.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1594667
##
## $winRight
## [1] 0.8405333
# Bayesian Correlated Test
bct_tda_pca_5.40.5_rf.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_rf.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_rf.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_rf.n4_test))
#BayesFactor
#bf_tda_pca_5.40.5_rf.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_rf.n4_test)) #bf_tda_pca_5.40.5_rf.n4_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_rf.n4_test))
##Node5
Adult_TDA_PC_5.40.5_n5_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n5.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
Adult_TDA_PC_5.40.5_n5_RfFit0
## Random Forest
##
## 12081 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8054, 8054, 8054
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.9996689 0.0000000000
## 100 0.9995034 -0.0001655903
## 150 0.9994206 -0.0001931979
## 200 0.9994206 -0.0001931979
## 250 0.9994206 -0.0001931979
## 300 0.9994206 -0.0001931979
## 350 0.9994206 -0.0001931979
## 400 0.9995034 -0.0001655903
## 450 0.9995034 -0.0001655903
## 500 0.9993378 -0.0002070033
## 550 0.9995034 -0.0001655903
## 600 0.9995034 -0.0001655903
## 650 0.9994206 -0.0001931979
## 700 0.9994206 -0.0001931979
## 750 0.9994206 -0.0001931979
## 800 0.9994206 -0.0001931979
## 850 0.9994206 -0.0001931979
## 900 0.9994206 -0.0001931979
## 950 0.9995034 -0.0001655903
## 1000 0.9994206 -0.0001931979
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_PC_5.40.5_n5_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.9997517 0 Fold1
## 2 0.9995034 0 Fold3
## 3 0.9997517 0 Fold2
ad_tda_pc_5.40.5_n5_rf_fit0_re<-Adult_TDA_PC_5.40.5_n5_RfFit0$resample[1]
summary(Adult_TDA_PC_5.40.5_n5_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 12081 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 24162 matrix numeric
## oob.times 12081 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 12081 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_PC_5.40.5_n5_RfFit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n5_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_PC_5.40.5_n5_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n5_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n5_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n5_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n5_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n5_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_pc_5.40.5_n5_rf_cf0_ov_acc<-ad_tda_pc_5.40.5_n5_rf_cf0$overall[1]
ad_tda_pc_5.40.5_n5_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_pc_5.40.5_n5_rf_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n5_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.40.5_rf_n5_3_fold<-(ad_rf_fit_re-ad_tda_pc_5.40.5_n5_rf_fit0_re)
diff_tda_pca_5.40.5_rf_n5_3_fold
## Accuracy
## 1 -0.1361213
## 2 -0.1419092
## 3 -0.1492647
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_rf.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_rf_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_rf.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_rf.n5_3_fold_odds.left<-bst_tda_pca_5.40.5_rf.n5_3_fold$probLeft/bst_tda_pca_5.40.5_rf.n5_3_fold$probRight
bst_tda_pca_5.40.5_rf.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_rf.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_rf_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_rf.n5_3_fold
## $winLeft
## [1] 0.9907333
##
## $winRope
## [1] 0.009266667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.40.5_rf.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_rf_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_rf.n5_3_fold
## $left
## [1] 0.9994511
##
## $rope
## [1] 0.000134423
##
## $right
## [1] 0.0004144829
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_rf_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_rf_n5_3_fold))
#bf_tda_pca_5.40.5_rf.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_rf_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_rf_n5_3_fold)
## t = -37.451, df = 2, p-value = 0.0007122
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1587954 -0.1260681
## sample estimates:
## mean of x
## -0.1424318
### Test set diff
diff_tda_pca_5.40.5_rf.n5_test<-(rf_cf_ov_acc-ad_tda_pc_5.40.5_n5_rf_cf0_ov_acc)
diff_tda_pca_5.40.5_rf.n5_test
## Accuracy
## 0.09971335
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_rf.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_rf.n5_test),-0.01,0.01)
bst_tda_pca_5.40.5_rf.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_rf.n5_test_odds.left<-bst_tda_pca_5.40.5_rf.n5_test$probLeft/bst_tda_pca_5.40.5_rf.n5_test$probRight
bst_tda_pca_5.40.5_rf.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_rf.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_rf.n5_test),-0.01,0.01)
bsr_tda_pca_5.40.5_rf.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1600667
##
## $winRight
## [1] 0.8399333
# Bayesian Correlated Test
bct_tda_pca_5.40.5_rf.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_rf.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_rf.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_rf.n5_test))
#BayesFactor
#bf_tda_pca_5.40.5_rf.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_rf.n5_test)) #bf_tda_pca_5.40.5_rf.n5_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_rf.n5_test))
##With TDA KDE filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_KDE_5.40.5_n1_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n1.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
Adult_TDA_KDE_5.40.5_n1_RfFit0
## Random Forest
##
## 11838 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7892, 7891, 7893
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.8636600 0.6334583
## 100 0.8618857 0.6292076
## 150 0.8610410 0.6278349
## 200 0.8604498 0.6259625
## 250 0.8612101 0.6274137
## 300 0.8613790 0.6283440
## 350 0.8621391 0.6304952
## 400 0.8616323 0.6293076
## 450 0.8619699 0.6296452
## 500 0.8617169 0.6289204
## 550 0.8628996 0.6322507
## 600 0.8618859 0.6295355
## 650 0.8610410 0.6271913
## 700 0.8611255 0.6277824
## 750 0.8611257 0.6278591
## 800 0.8626458 0.6316204
## 850 0.8602808 0.6251932
## 900 0.8606187 0.6262678
## 950 0.8612944 0.6278230
## 1000 0.8606189 0.6254455
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.40.5_n1_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8621389 0.6302650 Fold1
## 2 0.8676806 0.6442454 Fold3
## 3 0.8611604 0.6258646 Fold2
ad_tda_kde_5.40.5_n1_rf_fit0_re<-Adult_TDA_KDE_5.40.5_n1_RfFit0$resample[1]
summary(Adult_TDA_KDE_5.40.5_n1_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 11838 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 23676 matrix numeric
## oob.times 11838 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 11838 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_KDE_5.40.5_n1_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.40.5_n1_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.40.5_n1_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n1_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n1_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n1_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7164 653
## >50K 252 1699
##
## Accuracy : 0.9074
## 95% CI : (0.9014, 0.913)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7309
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9660
## Specificity : 0.7224
## Pos Pred Value : 0.9165
## Neg Pred Value : 0.8708
## Prevalence : 0.7592
## Detection Rate : 0.7334
## Detection Prevalence : 0.8003
## Balanced Accuracy : 0.8442
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n1_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7164 653
## >50K 252 1699
##
## Accuracy : 0.9074
## 95% CI : (0.9014, 0.913)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7309
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9660
## Specificity : 0.7224
## Pos Pred Value : 0.9165
## Neg Pred Value : 0.8708
## Prevalence : 0.7592
## Detection Rate : 0.7334
## Detection Prevalence : 0.8003
## Balanced Accuracy : 0.8442
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n1_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.073505e-01 7.309312e-01 9.014274e-01 9.130295e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 4.000598e-313 2.427221e-40
ad_tda_kde_5.40.5_n1_rf_cf0_ov_acc<-ad_tda_kde_5.40.5_n1_rf_cf0$overall[1]
ad_tda_kde_5.40.5_n1_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9660194 0.7223639 0.9164641
## Neg Pred Value Precision Recall
## 0.8708355 0.9164641 0.9660194
## F1 Prevalence Detection Rate
## 0.9405895 0.7592138 0.7334152
## Detection Prevalence Balanced Accuracy
## 0.8002662 0.8441917
ad_tda_kde_5.40.5_n1_rf_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n1_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_rf_n1_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.40.5_n1_rf_fit0_re)
diff_tda_kde_5.40.5_rf_n1_3_fold
## Accuracy
## 1 0.001491506
## 2 -0.010086505
## 3 -0.010673405
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_rf.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_rf_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_rf.n1_3_fold
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_rf.n1_3_fold_odds.left<-bst_tda_kde_5.40.5_rf.n1_3_fold$probLeft/bst_tda_kde_5.40.5_rf.n1_3_fold$probRight
bst_tda_kde_5.40.5_rf.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_rf.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_rf_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_rf.n1_3_fold
## $winLeft
## [1] 0.3269333
##
## $winRope
## [1] 0.6730667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_rf.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_rf_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_rf.n1_3_fold
## $left
## [1] 0.2580109
##
## $rope
## [1] 0.7072094
##
## $right
## [1] 0.03477973
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_rf_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_rf.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_rf_n1_3_fold))
#bf_tda_kde_5.40.5_rf.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_rf_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_rf_n1_3_fold)
## t = -1.6216, df = 2, p-value = 0.2463
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.02346466 0.01061905
## sample estimates:
## mean of x
## -0.006422801
### Test set diff
diff_tda_kde_5.40.5_rf.n1_test<-(rf_cf_ov_acc-ad_tda_kde_5.40.5_n1_rf_cf0_ov_acc)
diff_tda_kde_5.40.5_rf.n1_test
## Accuracy
## -0.04842342
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_rf.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_rf.n1_test),-0.01,0.01)
bst_tda_kde_5.40.5_rf.n1_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_rf.n1_test_odds.left<-bst_tda_kde_5.40.5_rf.n1_test$probLeft/bst_tda_kde_5.40.5_rf.n1_test$probRight
bst_tda_kde_5.40.5_rf.n1_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_rf.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_rf.n1_test),-0.01,0.01)
bsr_tda_kde_5.40.5_rf.n1_test
## $winLeft
## [1] 0.8396
##
## $winRope
## [1] 0.1604
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_rf.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_rf.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_rf.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_rf.n1_test))
#BayesFactor
#bf_tda_kde_5.40.5_rf.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_rf.n1_test)) #bf_tda_kde_5.40.5_rf.n1_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_rf.n1_test))
##Node2
Adult_TDA_KDE_5.40.5_n2_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n2.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
Adult_TDA_KDE_5.40.5_n2_RfFit0
## Random Forest
##
## 11203 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7469, 7469, 7468
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.8457554 0.6047366
## 100 0.8437920 0.6001984
## 150 0.8432564 0.5986506
## 200 0.8438811 0.6011784
## 250 0.8432563 0.5995230
## 300 0.8422745 0.5964219
## 350 0.8437919 0.6008044
## 400 0.8435241 0.6001002
## 450 0.8419176 0.5964063
## 500 0.8420959 0.5963587
## 550 0.8421852 0.5972741
## 600 0.8425421 0.5972713
## 650 0.8427208 0.5981473
## 700 0.8430777 0.5992367
## 750 0.8431669 0.5988447
## 800 0.8441490 0.6009856
## 850 0.8426316 0.5982195
## 900 0.8426314 0.5982829
## 950 0.8433456 0.6005848
## 1000 0.8423636 0.5973269
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.40.5_n2_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8393144 0.5891400 Fold1
## 2 0.8476573 0.6098193 Fold3
## 3 0.8502946 0.6152507 Fold2
ad_tda_KDE_5.40.5_n2_rf_fit0_re<-Adult_TDA_KDE_5.40.5_n2_RfFit0$resample[1]
summary(Adult_TDA_KDE_5.40.5_n2_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 11203 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 22406 matrix numeric
## oob.times 11203 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 11203 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_KDE_5.40.5_n2_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.40.5_n2_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.40.5_n2_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n2_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n2_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n2_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7129 623
## >50K 287 1729
##
## Accuracy : 0.9068
## 95% CI : (0.9009, 0.9125)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7321
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9613
## Specificity : 0.7351
## Pos Pred Value : 0.9196
## Neg Pred Value : 0.8576
## Prevalence : 0.7592
## Detection Rate : 0.7298
## Detection Prevalence : 0.7936
## Balanced Accuracy : 0.8482
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n2_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7129 623
## >50K 287 1729
##
## Accuracy : 0.9068
## 95% CI : (0.9009, 0.9125)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.7321
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9613
## Specificity : 0.7351
## Pos Pred Value : 0.9196
## Neg Pred Value : 0.8576
## Prevalence : 0.7592
## Detection Rate : 0.7298
## Detection Prevalence : 0.7936
## Balanced Accuracy : 0.8482
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n2_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.068387e-01 7.321284e-01 9.009014e-01 9.125320e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.139479e-310 1.184291e-28
ad_tda_kde_5.40.5_n2_rf_cf0_ov_acc<-ad_tda_kde_5.40.5_n2_rf_cf0$overall[1]
ad_tda_kde_5.40.5_n2_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9612999 0.7351190 0.9196336
## Neg Pred Value Precision Recall
## 0.8576389 0.9196336 0.9612999
## F1 Prevalence Detection Rate
## 0.9400053 0.7592138 0.7298321
## Detection Prevalence Balanced Accuracy
## 0.7936118 0.8482095
ad_tda_kde_5.40.5_n2_rf_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n2_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_rf_n2_3_fold<-(ad_rf_fit_re-ad_tda_KDE_5.40.5_n2_rf_fit0_re)
diff_tda_kde_5.40.5_rf_n2_3_fold
## Accuracy
## 1 0.024315972
## 2 0.009936808
## 3 0.000192380
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_rf.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_rf_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_rf.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.75
##
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_rf.n2_3_fold_odds.left<-bst_tda_kde_5.40.5_rf.n2_3_fold$probLeft/bst_tda_kde_5.40.5_rf.n2_3_fold$probRight
bst_tda_kde_5.40.5_rf.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_rf.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_rf_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_rf.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.5785
##
## $winRight
## [1] 0.4215
# Bayesian Correlated Test
bct_tda_kde_5.40.5_rf.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_rf_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_rf.n2_3_fold
## $left
## [1] 0.05869393
##
## $rope
## [1] 0.3770915
##
## $right
## [1] 0.5642146
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_rf_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_rf.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_rf_n2_3_fold))
#bf_tda_kde_5.40.5_rf.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_rf_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_rf_n2_3_fold)
## t = 1.6387, df = 2, p-value = 0.2429
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.01866521 0.04162865
## sample estimates:
## mean of x
## 0.01148172
### Test set diff
diff_tda_kde_5.40.5_rf.n2_test<-(rf_cf_ov_acc-ad_tda_kde_5.40.5_n2_rf_cf0_ov_acc)
diff_tda_kde_5.40.5_rf.n2_test
## Accuracy
## -0.04791155
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_rf.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_rf.n2_test),-0.01,0.01)
bst_tda_kde_5.40.5_rf.n2_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_rf.n2_test_odds.left<-bst_tda_kde_5.40.5_rf.n2_test$probLeft/bst_tda_kde_5.40.5_rf.n2_test$probRight
bst_tda_kde_5.40.5_rf.n2_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_rf.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_rf.n2_test),-0.01,0.01)
bsr_tda_kde_5.40.5_rf.n2_test
## $winLeft
## [1] 0.8406333
##
## $winRope
## [1] 0.1593667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_rf.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_rf.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_rf.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_rf.n2_test))
#BayesFactor
#bf_tda_kde_5.40.5_rf.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_rf.n2_test)) #bf_tda_kde_5.40.5_rf.n2_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_rf.n2_test))
##Node3
Adult_TDA_KDE_5.40.5_n3_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n3.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
Adult_TDA_KDE_5.40.5_n3_RfFit0
## Random Forest
##
## 10351 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 6901, 6901, 6900
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.8282292 0.5644916
## 100 0.8256209 0.5577431
## 150 0.8257176 0.5582549
## 200 0.8263937 0.5600762
## 250 0.8269736 0.5618642
## 300 0.8262973 0.5598900
## 350 0.8267804 0.5605272
## 400 0.8249448 0.5561782
## 450 0.8256208 0.5577533
## 500 0.8254278 0.5574561
## 550 0.8250415 0.5564941
## 600 0.8257177 0.5579884
## 650 0.8268768 0.5608959
## 700 0.8265870 0.5604204
## 750 0.8252347 0.5570562
## 800 0.8268766 0.5615544
## 850 0.8258142 0.5580689
## 900 0.8262972 0.5598700
## 950 0.8273599 0.5617098
## 1000 0.8251382 0.5572343
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.40.5_n3_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8304348 0.5733312 Fold1
## 2 0.8281657 0.5637203 Fold3
## 3 0.8260870 0.5564232 Fold2
ad_tda_kde_5.40.5_n3_rf_fit0_re<-Adult_TDA_KDE_5.40.5_n3_RfFit0$resample[1]
summary(Adult_TDA_KDE_5.40.5_n3_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 10351 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 20702 matrix numeric
## oob.times 10351 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 10351 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_KDE_5.40.5_n3_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.40.5_n3_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.40.5_n3_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n3_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n3_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n3_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7074 600
## >50K 342 1752
##
## Accuracy : 0.9036
## 95% CI : (0.8975, 0.9093)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.726
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9539
## Specificity : 0.7449
## Pos Pred Value : 0.9218
## Neg Pred Value : 0.8367
## Prevalence : 0.7592
## Detection Rate : 0.7242
## Detection Prevalence : 0.7856
## Balanced Accuracy : 0.8494
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n3_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7074 600
## >50K 342 1752
##
## Accuracy : 0.9036
## 95% CI : (0.8975, 0.9093)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.726
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9539
## Specificity : 0.7449
## Pos Pred Value : 0.9218
## Neg Pred Value : 0.8367
## Prevalence : 0.7592
## Detection Rate : 0.7242
## Detection Prevalence : 0.7856
## Balanced Accuracy : 0.8494
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n3_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 9.035627e-01 7.259706e-01 8.975366e-01 9.093468e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 2.867234e-295 5.592576e-17
ad_tda_kde_5.40.5_n3_rf_cf0_ov_acc<-ad_tda_kde_5.40.5_n3_rf_cf0$overall[1]
ad_tda_kde_5.40.5_n3_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9538835 0.7448980 0.9218139
## Neg Pred Value Precision Recall
## 0.8366762 0.9218139 0.9538835
## F1 Prevalence Detection Rate
## 0.9375746 0.7592138 0.7242015
## Detection Prevalence Balanced Accuracy
## 0.7856265 0.8493907
ad_tda_kde_5.40.5_n3_rf_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n3_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_rf_n3_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.40.5_n3_rf_fit0_re)
diff_tda_kde_5.40.5_rf_n3_3_fold
## Accuracy
## 1 0.03319560
## 2 0.02942835
## 3 0.02440001
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_rf.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_rf_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_rf.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_rf.n3_3_fold_odds.left<-bst_tda_kde_5.40.5_rf.n3_3_fold$probLeft/bst_tda_kde_5.40.5_rf.n3_3_fold$probRight
bst_tda_kde_5.40.5_rf.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_rf.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_rf_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_rf.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.0084
##
## $winRight
## [1] 0.9916
# Bayesian Correlated Test
bct_tda_kde_5.40.5_rf.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_rf_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_rf.n3_3_fold
## $left
## [1] 0.002819873
##
## $rope
## [1] 0.008743332
##
## $right
## [1] 0.9884368
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_rf_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_rf.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_rf_n3_3_fold))
#bf_tda_kde_5.40.5_rf.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_rf_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_rf_n3_3_fold)
## t = 11.386, df = 2, p-value = 0.007626
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.01804590 0.03997008
## sample estimates:
## mean of x
## 0.02900799
### Test set diff
diff_tda_kde_5.40.5_rf.n3_test<-(rf_cf_ov_acc-ad_tda_kde_5.40.5_n3_rf_cf0_ov_acc)
diff_tda_kde_5.40.5_rf.n3_test
## Accuracy
## -0.04463554
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_rf.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_rf.n3_test),-0.01,0.01)
bst_tda_kde_5.40.5_rf.n3_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_rf.n3_test_odds.left<-bst_tda_kde_5.40.5_rf.n3_test$probLeft/bst_tda_kde_5.40.5_rf.n3_test$probRight
bst_tda_kde_5.40.5_rf.n3_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_rf.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_rf.n3_test),-0.01,0.01)
bsr_tda_kde_5.40.5_rf.n3_test
## $winLeft
## [1] 0.8372667
##
## $winRope
## [1] 0.1627333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_rf.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_rf.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_rf.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_rf.n3_test))
#BayesFactor
#bf_tda_kde_5.40.5_rf.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_rf.n3_test)) #bf_tda_kde_5.40.5_rf.n3_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_rf.n3_test))
##Node4
Adult_TDA_KDE_5.40.5_n4_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n4.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
Adult_TDA_KDE_5.40.5_n4_RfFit0
## Random Forest
##
## 8741 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 5828, 5827, 5827
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.8557372 0.5207916
## 100 0.8533348 0.5155632
## 150 0.8528771 0.5153671
## 200 0.8523052 0.5125524
## 250 0.8520763 0.5117217
## 300 0.8519622 0.5103528
## 350 0.8542499 0.5181504
## 400 0.8515043 0.5103713
## 450 0.8529916 0.5143107
## 500 0.8529915 0.5145521
## 550 0.8527628 0.5155377
## 600 0.8527627 0.5150852
## 650 0.8525339 0.5122389
## 700 0.8519620 0.5105925
## 750 0.8519620 0.5114700
## 800 0.8517330 0.5087312
## 850 0.8512755 0.5100794
## 900 0.8526485 0.5148234
## 950 0.8531060 0.5161048
## 1000 0.8517331 0.5098221
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.40.5_n4_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8547889 0.5179049 Fold1
## 2 0.8544955 0.5191210 Fold3
## 3 0.8579272 0.5253489 Fold2
ad_tda_kde_5.40.5_n4_rf_fit0_re<-Adult_TDA_KDE_5.40.5_n4_RfFit0$resample[1]
summary(Adult_TDA_KDE_5.40.5_n4_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 8741 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 17482 matrix numeric
## oob.times 8741 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 8741 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_KDE_5.40.5_n4_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.40.5_n4_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.40.5_n4_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n4_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n4_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n4_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6857 684
## >50K 559 1668
##
## Accuracy : 0.8727
## 95% CI : (0.866, 0.8793)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6455
##
## Mcnemar's Test P-Value : 0.0004363
##
## Sensitivity : 0.9246
## Specificity : 0.7092
## Pos Pred Value : 0.9093
## Neg Pred Value : 0.7490
## Prevalence : 0.7592
## Detection Rate : 0.7020
## Detection Prevalence : 0.7720
## Balanced Accuracy : 0.8169
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n4_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6857 684
## >50K 559 1668
##
## Accuracy : 0.8727
## 95% CI : (0.866, 0.8793)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6455
##
## Mcnemar's Test P-Value : 0.0004363
##
## Sensitivity : 0.9246
## Specificity : 0.7092
## Pos Pred Value : 0.9093
## Neg Pred Value : 0.7490
## Prevalence : 0.7592
## Detection Rate : 0.7020
## Detection Prevalence : 0.7720
## Balanced Accuracy : 0.8169
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n4_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.727477e-01 6.455193e-01 8.659765e-01 8.792959e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 4.430507e-174 4.362707e-04
ad_tda_kde_5.40.5_n4_rf_cf0_ov_acc<-ad_tda_kde_5.40.5_n4_rf_cf0$overall[1]
ad_tda_kde_5.40.5_n4_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9246224 0.7091837 0.9092958
## Neg Pred Value Precision Recall
## 0.7489897 0.9092958 0.9246224
## F1 Prevalence Detection Rate
## 0.9168951 0.7592138 0.7019861
## Detection Prevalence Balanced Accuracy
## 0.7720106 0.8169031
ad_tda_kde_5.40.5_n4_rf_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n4_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_rf_n4_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.40.5_n4_rf_fit0_re)
diff_tda_kde_5.40.5_rf_n4_3_fold
## Accuracy
## 1 0.008841503
## 2 0.003098565
## 3 -0.007440278
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_rf.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_rf_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_rf.n4_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_rf.n4_3_fold_odds.left<-bst_tda_kde_5.40.5_rf.n4_3_fold$probLeft/bst_tda_kde_5.40.5_rf.n4_3_fold$probRight
bst_tda_kde_5.40.5_rf.n4_3_fold_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_rf.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_rf_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_rf.n4_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_rf.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_rf_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_rf.n4_3_fold
## $left
## [1] 0.08596089
##
## $rope
## [1] 0.7827507
##
## $right
## [1] 0.1312884
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_rf_n4_3_fold,c(-0.01,0.01)))

### Test set diff
diff_tda_kde_5.40.5_rf.n4_test<-(rf_cf_ov_acc-ad_tda_kde_5.40.5_n4_rf_cf0_ov_acc)
diff_tda_kde_5.40.5_rf.n4_test
## Accuracy
## -0.01382064
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_rf.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_rf.n4_test),-0.01,0.01)
bst_tda_kde_5.40.5_rf.n4_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
#BayesFactor
#bf_tda_kde_5.40.5_rf.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_rf_n4_3_fold))
#bf_tda_kde_5.40.5_rf.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_rf_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_rf_n4_3_fold)
## t = 0.31461, df = 2, p-value = 0.7828
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.01901352 0.02201338
## sample estimates:
## mean of x
## 0.00149993
bst_tda_kde_5.40.5_rf.n4_test_odds.left<-bst_tda_kde_5.40.5_rf.n4_test$probLeft/bst_tda_kde_5.40.5_rf.n4_test$probRight
bst_tda_kde_5.40.5_rf.n4_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_rf.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_rf.n4_test),-0.01,0.01)
bsr_tda_kde_5.40.5_rf.n4_test
## $winLeft
## [1] 0.5420333
##
## $winRope
## [1] 0.4579667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_rf.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_rf.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_rf.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_rf.n4_test))
#BayesFactor
#bf_tda_kde_5.40.5_rf.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_rf.n4_test)) #bf_tda_kde_5.40.5_rf.n4_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_rf.n4_test))
##Node5
Adult_TDA_KDE_5.40.5_n5_RfFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n5.vec,
Importance = T,
method = 'rf',
trControl = fitControl,
tuneGrid = rfGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Hungary
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th,
## V4.Assoc.acdm, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.France, V14.Holand.Netherlands, V14.Honduras
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands,
## V14.Scotland
## Warning in randomForest.default(x, y, mtry = param$mtry, ...): invalid mtry:
## reset to within valid range
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.12th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Bachelors,
## V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
Adult_TDA_KDE_5.40.5_n5_RfFit0
## Random Forest
##
## 6628 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 4418, 4419, 4419
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 50 0.8735676 0.4123904
## 100 0.8690414 0.3973959
## 150 0.8707014 0.4083172
## 200 0.8705502 0.4076269
## 250 0.8731153 0.4191783
## 300 0.8707012 0.4079601
## 350 0.8696450 0.4025312
## 400 0.8707014 0.4041953
## 450 0.8711541 0.4068876
## 500 0.8714556 0.4118756
## 550 0.8707008 0.4057698
## 600 0.8713050 0.4069658
## 650 0.8702485 0.4035178
## 700 0.8714559 0.4113993
## 750 0.8707014 0.4058444
## 800 0.8719084 0.4115080
## 850 0.8688906 0.4002628
## 900 0.8707010 0.4011834
## 950 0.8714557 0.4096678
## 1000 0.8691925 0.4013203
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 50.
Adult_TDA_KDE_5.40.5_n5_RfFit0$resample
## Accuracy Kappa Resample
## 1 0.8674208 0.3928947 Fold1
## 2 0.8750566 0.4271266 Fold3
## 3 0.8782254 0.4171498 Fold2
ad_tda_kde_5.40.5_n5_rf_fit0_re<-Adult_TDA_KDE_5.40.5_n5_RfFit0$resample[1]
summary(Adult_TDA_KDE_5.40.5_n5_RfFit0)
## Length Class Mode
## call 5 -none- call
## type 1 -none- character
## predicted 6628 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 13256 matrix numeric
## oob.times 6628 -none- numeric
## classes 2 -none- character
## importance 108 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 6628 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## param 1 -none- list
vip(Adult_TDA_KDE_5.40.5_n5_RfFit0,25) + ggtitle("Adult_TDA_KDE_5.40.5_n5_RfFit TDA-Assited RF")

# Predict outcome using Adult_TDA_KDE_5.40.5_n5_RfFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n5_RfFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n5_rf_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n5_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6836 896
## >50K 580 1456
##
## Accuracy : 0.8489
## 95% CI : (0.8416, 0.8559)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5668
##
## Mcnemar's Test P-Value : 2.421e-16
##
## Sensitivity : 0.9218
## Specificity : 0.6190
## Pos Pred Value : 0.8841
## Neg Pred Value : 0.7151
## Prevalence : 0.7592
## Detection Rate : 0.6998
## Detection Prevalence : 0.7916
## Balanced Accuracy : 0.7704
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n5_rf_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6836 896
## >50K 580 1456
##
## Accuracy : 0.8489
## 95% CI : (0.8416, 0.8559)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5668
##
## Mcnemar's Test P-Value : 2.421e-16
##
## Sensitivity : 0.9218
## Specificity : 0.6190
## Pos Pred Value : 0.8841
## Neg Pred Value : 0.7151
## Prevalence : 0.7592
## Detection Rate : 0.6998
## Detection Prevalence : 0.7916
## Balanced Accuracy : 0.7704
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n5_rf_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.488943e-01 5.668402e-01 8.416367e-01 8.559433e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.008218e-105 2.421481e-16
ad_tda_kde_5.40.5_n5_rf_cf0_ov_acc<-ad_tda_kde_5.40.5_n5_rf_cf0$overall[1]
ad_tda_kde_5.40.5_n5_rf_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9217907 0.6190476 0.8841180
## Neg Pred Value Precision Recall
## 0.7151277 0.8841180 0.9217907
## F1 Prevalence Detection Rate
## 0.9025614 0.7592138 0.6998362
## Detection Prevalence Balanced Accuracy
## 0.7915643 0.7704192
ad_tda_kde_5.40.5_n5_rf_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n5_rf_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_rf_n5_3_fold<-(ad_rf_fit_re-ad_tda_kde_5.40.5_n5_rf_fit0_re)
diff_tda_kde_5.40.5_rf_n5_3_fold
## Accuracy
## 1 -0.003790434
## 2 -0.017462483
## 3 -0.027738471
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_rf.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_rf_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_rf.n5_3_fold
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_rf.n5_3_fold_odds.left<-bst_tda_kde_5.40.5_rf.n5_3_fold$probLeft/bst_tda_kde_5.40.5_rf.n5_3_fold$probRight
bst_tda_kde_5.40.5_rf.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_rf.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_rf_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_rf.n5_3_fold
## $winLeft
## [1] 0.7847667
##
## $winRope
## [1] 0.2152333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_rf.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_rf_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_rf.n5_3_fold
## $left
## [1] 0.7439305
##
## $rope
## [1] 0.2153731
##
## $right
## [1] 0.04069636
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_rf_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_rf.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_rf_n5_3_fold))
#bf_tda_kde_5.40.5_rf.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_rf_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_rf_n5_3_fold)
## t = -2.3543, df = 2, p-value = 0.1428
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.04617510 0.01351418
## sample estimates:
## mean of x
## -0.01633046
### Test set diff
diff_tda_kde_5.40.5_rf.n5_test<-(rf_cf_ov_acc-ad_tda_kde_5.40.5_n5_rf_cf0_ov_acc)
diff_tda_kde_5.40.5_rf.n5_test
## Accuracy
## 0.01003276
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_rf.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_rf.n5_test),-0.01,0.01)
bst_tda_kde_5.40.5_rf.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_rf.n5_test_odds.left<-bst_tda_kde_5.40.5_rf.n5_test$probLeft/bst_tda_kde_5.40.5_rf.n5_test$probRight
bst_tda_kde_5.40.5_rf.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_rf.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_rf.n5_test),-0.01,0.01)
bsr_tda_kde_5.40.5_rf.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.4589333
##
## $winRight
## [1] 0.5410667
# Bayesian Correlated Test
bct_tda_kde_5.40.5_rf.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_rf.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_rf.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_rf.n5_test))
#BayesFactor
#bf_tda_kde_5.40.5_rf.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_rf.n5_test)) #bf_tda_kde_5.40.5_rf.n5_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_rf.n5_test))
##Non-TDA-Assisted
svmGrid<-expand.grid(sigma = c(0.1, 1, 10), C = (1:5*0.25))
#Support Vector Machine-Radial Basis
adultSvmFit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
adultSvmFit
## Support Vector Machines with Radial Basis Function Kernel
##
## 22793 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 15195, 15195, 15196
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.8074847 0.3500101868
## 0.1 0.50 0.8165665 0.4055675167
## 0.1 0.75 0.8211732 0.4325583188
## 0.1 1.00 0.8231914 0.4476493440
## 0.1 1.25 0.8238056 0.4561167589
## 1.0 0.25 0.7781336 0.1411353617
## 1.0 0.50 0.7882245 0.2207762591
## 1.0 0.75 0.7932698 0.2634153629
## 1.0 1.00 0.7972622 0.2991704557
## 1.0 1.25 0.7980081 0.3157845092
## 10.0 0.25 0.7592243 0.0002766655
## 10.0 0.50 0.7604089 0.0153264999
## 10.0 0.75 0.7637871 0.0462168622
## 10.0 1.00 0.7667705 0.0813937482
## 10.0 1.25 0.7680867 0.1017111305
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
adultSvmFit$resample
## Accuracy Kappa Resample
## 1 0.8239010 0.4624931 Fold1
## 2 0.8253258 0.4597737 Fold3
## 3 0.8221901 0.4460835 Fold2
ad_svm_fit_re<-adultSvmFit$resample[1]
summary(adultSvmFit)
## Length Class Mode
## 1 ksvm S4
#vip(adultSvmFit, 25) + ggtitle("non-TDA-Assited Svm")
# Predict outcome using model from training data based on testing data
predictions <- predict(adultSvmFit, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
svm_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
svm_cf
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6941 1215
## >50K 475 1137
##
## Accuracy : 0.827
## 95% CI : (0.8193, 0.8344)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.4698
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9359
## Specificity : 0.4834
## Pos Pred Value : 0.8510
## Neg Pred Value : 0.7053
## Prevalence : 0.7592
## Detection Rate : 0.7106
## Detection Prevalence : 0.8350
## Balanced Accuracy : 0.7097
##
## 'Positive' Class : <=50K
##
svm_cf$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.269861e-01 4.698380e-01 8.193364e-01 8.344402e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.336799e-59 2.986040e-72
svm_cf_ov_acc<-svm_cf$overall[1]
svm_cf$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9359493 0.4834184 0.8510299
## Neg Pred Value Precision Recall
## 0.7053350 0.8510299 0.9359493
## F1 Prevalence Detection Rate
## 0.8914719 0.7592138 0.7105856
## Detection Prevalence Balanced Accuracy
## 0.8349713 0.7096838
svm_cf_pr_rec_f1<-svm_cf$byClass[5:7]
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_PC_5.40.5_n1_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n2.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Laos
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.40.5_n1_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 10276 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 6851, 6851, 6850
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.6835340 0.27566297
## 0.1 0.50 0.6937518 0.31603664
## 0.1 0.75 0.6959906 0.32800238
## 0.1 1.00 0.6956015 0.33313031
## 0.1 1.25 0.6946285 0.33439397
## 1.0 0.25 0.6414947 0.12102319
## 1.0 0.50 0.6639735 0.20347703
## 1.0 0.75 0.6703967 0.23720347
## 1.0 1.00 0.6742897 0.25948107
## 1.0 1.25 0.6742895 0.26935552
## 10.0 0.25 0.6145387 0.01539684
## 10.0 0.50 0.6220319 0.04604963
## 10.0 0.75 0.6260217 0.07144294
## 10.0 1.00 0.6322499 0.10247763
## 10.0 1.25 0.6360453 0.12540423
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 0.75.
Adult_TDA_PC_5.40.5_n1_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.6919708 0.3151232 Fold1
## 2 0.6995620 0.3403526 Fold2
## 3 0.6964390 0.3285313 Fold3
ad_tda_pc_5.40.5_n1_svm_fit_re<-Adult_TDA_PC_5.40.5_n1_SvmFit0 $resample[1]
summary(Adult_TDA_PC_5.40.5_n1_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_PC_5.40.5_n1_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n1_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_PC_5.40.5_n1_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n1_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n1_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n1_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 920 267
## >50K 6496 2085
##
## Accuracy : 0.3076
## 95% CI : (0.2985, 0.3169)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0055
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.12406
## Specificity : 0.88648
## Pos Pred Value : 0.77506
## Neg Pred Value : 0.24298
## Prevalence : 0.75921
## Detection Rate : 0.09419
## Detection Prevalence : 0.12152
## Balanced Accuracy : 0.50527
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n1_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 920 267
## >50K 6496 2085
##
## Accuracy : 0.3076
## 95% CI : (0.2985, 0.3169)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0055
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.12406
## Specificity : 0.88648
## Pos Pred Value : 0.77506
## Neg Pred Value : 0.24298
## Prevalence : 0.75921
## Detection Rate : 0.09419
## Detection Prevalence : 0.12152
## Balanced Accuracy : 0.50527
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n1_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.307637183 0.005532804 0.298492123 0.316897065 0.759213759
## AccuracyPValue McnemarPValue
## 1.000000000 0.000000000
ad_tda_pc_5.40.5_n1_svm_cf0_ov_acc<-ad_tda_pc_5.40.5_n1_svm_cf0$overall[1]
ad_tda_pc_5.40.5_n1_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.12405609 0.88647959 0.77506318
## Neg Pred Value Precision Recall
## 0.24297867 0.77506318 0.12405609
## F1 Prevalence Detection Rate
## 0.21387888 0.75921376 0.09418509
## Detection Prevalence Balanced Accuracy
## 0.12151925 0.50526784
ad_tda_pc_5.40.5_n1_svm_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n1_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.40.5_svm_n1_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.40.5_n1_svm_fit_re)
diff_tda_pca_5.40.5_svm_n1_3_fold
## Accuracy
## 1 0.1319302
## 2 0.1257637
## 3 0.1257511
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_svm.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_svm_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_svm.n1_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_svm.n1_3_fold_odds.left<-bst_tda_pca_5.40.5_svm.n1_3_fold$probLeft/bst_tda_pca_5.40.5_svm.n1_3_fold$probRight
bst_tda_pca_5.40.5_svm.n1_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_svm.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_svm_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_svm.n1_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.009833333
##
## $winRight
## [1] 0.9901667
# Bayesian Correlated Test
bct_tda_pca_5.40.5_svm.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_svm_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_svm.n1_3_fold
## $left
## [1] 0.0001485419
##
## $rope
## [1] 5.467953e-05
##
## $right
## [1] 0.9997968
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_svm_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_svm_n1_3_fold))
#bf_tda_pca_5.40.5_rf.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_svm_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_svm_n1_3_fold)
## t = 62.118, df = 2, p-value = 0.0002591
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.1189618 0.1366682
## sample estimates:
## mean of x
## 0.127815
### Test set diff
diff_tda_pca_5.40.5_svm.n1_test<-(svm_cf_ov_acc - ad_tda_pc_5.40.5_n1_svm_cf0_ov_acc)
diff_tda_pca_5.40.5_svm.n1_test
## Accuracy
## 0.5193489
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_svm.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_svm.n1_test),-0.01,0.01)
bst_tda_pca_5.40.5_svm.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_svm.n1_test_odds.left<-bst_tda_pca_5.40.5_svm.n1_test$probLeft/bst_tda_pca_5.40.5_svm.n1_test$probRight
bst_tda_pca_5.40.5_svm.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_svm.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_svm.n1_test),-0.01,0.01)
bsr_tda_pca_5.40.5_svm.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1622667
##
## $winRight
## [1] 0.8377333
# Bayesian Correlated Test
bct_tda_pca_5.40.5_svm.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_svm.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_svm.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_svm.n1_test)))
#BayesFactor
#bf_tda_pca_5.40.5_svm.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_svm.n1_test)) #bf_tda_pca_5.40.5_svm.n1_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_svm.n1_test))
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node2
Adult_TDA_PC_5.40.5_n2_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n2.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Dominican.Republic, V14.Guatemala,
## V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Nicaragua
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Armed.Forces, V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.Preschool,
## V7.Priv.house.serv, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.40.5_n2_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 10276 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 6851, 6850, 6851
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.6796422 0.26891199
## 0.1 0.50 0.6914171 0.31073716
## 0.1 0.75 0.6943368 0.32414095
## 0.1 1.00 0.6958940 0.33335086
## 0.1 1.25 0.6987162 0.34249154
## 1.0 0.25 0.6406186 0.11861340
## 1.0 0.50 0.6662127 0.20951112
## 1.0 0.75 0.6698140 0.23681241
## 1.0 1.00 0.6711760 0.25299882
## 1.0 1.25 0.6750690 0.27153478
## 10.0 0.25 0.6150252 0.01691059
## 10.0 0.50 0.6189177 0.03798253
## 10.0 0.75 0.6235885 0.06403264
## 10.0 1.00 0.6301083 0.09590707
## 10.0 1.25 0.6333205 0.11641877
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_PC_5.40.5_n2_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.7127007 0.3696479 Fold1
## 2 0.6928467 0.3287552 Fold3
## 3 0.6906013 0.3290715 Fold2
ad_tda_pc_5.40.5_n2_svm_fit_re<-Adult_TDA_PC_5.40.5_n2_SvmFit0 $resample[1]
summary(Adult_TDA_PC_5.40.5_n2_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_PC_5.40.5_n2_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n2_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_PC_5.40.5_n2_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n2_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n2_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n2_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 981 271
## >50K 6435 2081
##
## Accuracy : 0.3135
## 95% CI : (0.3043, 0.3228)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.009
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.1323
## Specificity : 0.8848
## Pos Pred Value : 0.7835
## Neg Pred Value : 0.2444
## Prevalence : 0.7592
## Detection Rate : 0.1004
## Detection Prevalence : 0.1282
## Balanced Accuracy : 0.5085
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n2_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 981 271
## >50K 6435 2081
##
## Accuracy : 0.3135
## 95% CI : (0.3043, 0.3228)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.009
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.1323
## Specificity : 0.8848
## Pos Pred Value : 0.7835
## Neg Pred Value : 0.2444
## Prevalence : 0.7592
## Detection Rate : 0.1004
## Detection Prevalence : 0.1282
## Balanced Accuracy : 0.5085
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n2_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.3134726 0.0090039 0.3042784 0.3227781 0.7592138
## AccuracyPValue McnemarPValue
## 1.0000000 0.0000000
ad_tda_pc_5.40.5_n2_svm_cf0_ov_acc<-ad_tda_pc_5.40.5_n2_svm_cf0$overall[1]
ad_tda_pc_5.40.5_n2_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.1322816 0.8847789 0.7835463
## Neg Pred Value Precision Recall
## 0.2443636 0.7835463 0.1322816
## F1 Prevalence Detection Rate
## 0.2263498 0.7592138 0.1004300
## Detection Prevalence Balanced Accuracy
## 0.1281736 0.5085302
ad_tda_pc_5.40.5_n2_svm_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n2_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.40.5_svm_n2_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.40.5_n2_svm_fit_re)
diff_tda_pca_5.40.5_svm_n2_3_fold
## Accuracy
## 1 0.1112003
## 2 0.1324791
## 3 0.1315888
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_svm.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_svm_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_svm.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_svm.n2_3_fold_odds.left<-bst_tda_pca_5.40.5_svm.n2_3_fold$probLeft/bst_tda_pca_5.40.5_svm.n2_3_fold$probRight
bst_tda_pca_5.40.5_svm.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_svm.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_svm_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_svm.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.0091
##
## $winRight
## [1] 0.9909
# Bayesian Correlated Test
bct_tda_pca_5.40.5_svm.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_svm_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_svm.n2_3_fold
## $left
## [1] 0.001754917
##
## $rope
## [1] 0.000658136
##
## $right
## [1] 0.9975869
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_svm_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_svm_n2_3_fold))
#bf_tda_pca_5.40.5_rf.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_svm_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_svm_n2_3_fold)
## t = 18, df = 2, p-value = 0.003072
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.09518898 0.15498978
## sample estimates:
## mean of x
## 0.1250894
### Test set diff
diff_tda_pca_5.40.5_svm.n2_test<-(svm_cf_ov_acc - ad_tda_pc_5.40.5_n2_svm_cf0_ov_acc)
diff_tda_pca_5.40.5_svm.n2_test
## Accuracy
## 0.5135135
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_svm.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_svm.n2_test),-0.01,0.01)
bst_tda_pca_5.40.5_svm.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_svm.n2_test_odds.left<-bst_tda_pca_5.40.5_svm.n2_test$probLeft/bst_tda_pca_5.40.5_svm.n2_test$probRight
bst_tda_pca_5.40.5_svm.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_svm.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_svm.n2_test),-0.01,0.01)
bsr_tda_pca_5.40.5_svm.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1605
##
## $winRight
## [1] 0.8395
# Bayesian Correlated Test
bct_tda_pca_5.40.5_svm.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_svm.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_svm.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_svm.n2_test)))
#BayesFactor
#bf_tda_pca_5.40.5_svm.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_svm.n2_test)) #bf_tda_pca_5.40.5_svm.n2_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_svm.n2_test))
##Node3
Adult_TDA_PC_5.40.5_n3_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n3.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands, V14.Hong
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.40.5_n3_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 11563 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7710, 7708, 7708
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.8126789 0.1841480591
## 0.1 0.50 0.8240082 0.2699248824
## 0.1 0.75 0.8270352 0.3061764819
## 0.1 1.00 0.8283324 0.3284044138
## 0.1 1.25 0.8270351 0.3366648606
## 1.0 0.25 0.7947764 0.0246909491
## 1.0 0.50 0.7993600 0.0632990660
## 1.0 0.75 0.8014356 0.0935180998
## 1.0 1.00 0.8024734 0.1232879248
## 1.0 1.25 0.8033378 0.1481303310
## 10.0 0.25 0.7918361 0.0006577511
## 10.0 0.50 0.7922684 0.0044211695
## 10.0 0.75 0.7927008 0.0110370893
## 10.0 1.00 0.7914033 0.0159858010
## 10.0 1.25 0.7905386 0.0212200622
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.
Adult_TDA_PC_5.40.5_n3_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.8318194 0.3469628 Fold1
## 2 0.8246433 0.3181618 Fold2
## 3 0.8285344 0.3200886 Fold3
ad_tda_pc_5.40.5_n3_svm_fit_re<-Adult_TDA_PC_5.40.5_n3_SvmFit0 $resample[1]
summary(Adult_TDA_PC_5.40.5_n3_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_PC_5.40.5_n3_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n3_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_PC_5.40.5_n3_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n3_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n3_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n3_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6671 1834
## >50K 745 518
##
## Accuracy : 0.736
## 95% CI : (0.7271, 0.7447)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.1423
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.8995
## Specificity : 0.2202
## Pos Pred Value : 0.7844
## Neg Pred Value : 0.4101
## Prevalence : 0.7592
## Detection Rate : 0.6829
## Detection Prevalence : 0.8707
## Balanced Accuracy : 0.5599
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n3_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6671 1834
## >50K 745 518
##
## Accuracy : 0.736
## 95% CI : (0.7271, 0.7447)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.1423
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.8995
## Specificity : 0.2202
## Pos Pred Value : 0.7844
## Neg Pred Value : 0.4101
## Prevalence : 0.7592
## Detection Rate : 0.6829
## Detection Prevalence : 0.8707
## Balanced Accuracy : 0.5599
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n3_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.359746e-01 1.422701e-01 7.271121e-01 7.446962e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 7.960558e-102
ad_tda_pc_5.40.5_n3_svm_cf0_ov_acc<-ad_tda_pc_5.40.5_n3_svm_cf0$overall[1]
ad_tda_pc_5.40.5_n3_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.8995415 0.2202381 0.7843621
## Neg Pred Value Precision Recall
## 0.4101346 0.7843621 0.8995415
## F1 Prevalence Detection Rate
## 0.8380127 0.7592138 0.6829443
## Detection Prevalence Balanced Accuracy
## 0.8707002 0.5598898
ad_tda_pc_5.40.5_n3_svm_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n3_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.40.5_svm_n3_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.40.5_n3_svm_fit_re)
diff_tda_pca_5.40.5_svm_n3_3_fold
## Accuracy
## 1 -0.0079183350
## 2 0.0006824661
## 3 -0.0063443209
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_svm.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_svm_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_svm.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_svm.n3_3_fold_odds.left<-bst_tda_pca_5.40.5_svm.n3_3_fold$probLeft/bst_tda_pca_5.40.5_svm.n3_3_fold$probRight
bst_tda_pca_5.40.5_svm.n3_3_fold_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_svm.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_svm_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_svm.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.40.5_svm.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_svm_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_svm.n3_3_fold
## $left
## [1] 0.1074374
##
## $rope
## [1] 0.8718418
##
## $right
## [1] 0.02072074
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_svm_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_svm_n3_3_fold))
#bf_tda_pca_5.40.5_rf.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_svm_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_svm_n3_3_fold)
## t = -1.7121, df = 2, p-value = 0.229
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.015902663 0.006849203
## sample estimates:
## mean of x
## -0.00452673
### Test set diff
diff_tda_pca_5.40.5_svm.n3_test<-(svm_cf_ov_acc - ad_tda_pc_5.40.5_n3_svm_cf0_ov_acc)
diff_tda_pca_5.40.5_svm.n3_test
## Accuracy
## 0.09101147
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_svm.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_svm.n3_test),-0.01,0.01)
bst_tda_pca_5.40.5_svm.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_svm.n3_test_odds.left<-bst_tda_pca_5.40.5_svm.n3_test$probLeft/bst_tda_pca_5.40.5_svm.n3_test$probRight
bst_tda_pca_5.40.5_svm.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_svm.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_svm.n3_test),-0.01,0.01)
bsr_tda_pca_5.40.5_svm.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1599667
##
## $winRight
## [1] 0.8400333
# Bayesian Correlated Test
bct_tda_pca_5.40.5_svm.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_svm.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_svm.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_svm.n3_test)))
#BayesFactor
#bf_tda_pca_5.40.5_svm.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_svm.n3_test)) #bf_tda_pca_5.40.5_svm.n3_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_svm.n3_test))
##Node4
Adult_TDA_PC_5.40.5_n4_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n4.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.40.5_n4_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 14818 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9878, 9879, 9879
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.9574841 0.011349211
## 0.1 0.50 0.9571468 0.054829463
## 0.1 0.75 0.9572143 0.081524431
## 0.1 1.00 0.9575518 0.104946524
## 0.1 1.25 0.9570794 0.110595159
## 1.0 0.25 0.9574167 0.016688355
## 1.0 0.50 0.9576866 0.049913574
## 1.0 0.75 0.9577541 0.058076344
## 1.0 1.00 0.9578891 0.076265471
## 1.0 1.25 0.9577541 0.087911776
## 10.0 0.25 0.9574167 0.000000000
## 10.0 0.50 0.9574842 0.008707248
## 10.0 0.75 0.9574167 0.014188683
## 10.0 1.00 0.9573492 0.019694916
## 10.0 1.25 0.9570118 0.024382482
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 1 and C = 1.
Adult_TDA_PC_5.40.5_n4_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.9572874 0.09422544 Fold1
## 2 0.9584936 0.08369420 Fold2
## 3 0.9578862 0.05087677 Fold3
ad_tda_pc_5.40.5_n4_svm_fit_re<-Adult_TDA_PC_5.40.5_n4_SvmFit0 $resample[1]
summary(Adult_TDA_PC_5.40.5_n4_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_PC_5.40.5_n4_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n4_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_PC_5.40.5_n4_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n4_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n4_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n4_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7413 2211
## >50K 3 141
##
## Accuracy : 0.7733
## 95% CI : (0.7649, 0.7816)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.00053
##
## Kappa : 0.0876
##
## Mcnemar's Test P-Value : < 2e-16
##
## Sensitivity : 0.99960
## Specificity : 0.05995
## Pos Pred Value : 0.77026
## Neg Pred Value : 0.97917
## Prevalence : 0.75921
## Detection Rate : 0.75891
## Detection Prevalence : 0.98526
## Balanced Accuracy : 0.52977
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n4_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7413 2211
## >50K 3 141
##
## Accuracy : 0.7733
## 95% CI : (0.7649, 0.7816)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.00053
##
## Kappa : 0.0876
##
## Mcnemar's Test P-Value : < 2e-16
##
## Sensitivity : 0.99960
## Specificity : 0.05995
## Pos Pred Value : 0.77026
## Neg Pred Value : 0.97917
## Prevalence : 0.75921
## Detection Rate : 0.75891
## Detection Prevalence : 0.98526
## Balanced Accuracy : 0.52977
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n4_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7733415233 0.0876324358 0.7649068502 0.7816129147 0.7592137592
## AccuracyPValue McnemarPValue
## 0.0005299571 0.0000000000
ad_tda_pc_5.40.5_n4_svm_cf0_ov_acc<-ad_tda_pc_5.40.5_n4_svm_cf0$overall[1]
ad_tda_pc_5.40.5_n4_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.99959547 0.05994898 0.77026185
## Neg Pred Value Precision Recall
## 0.97916667 0.77026185 0.99959547
## F1 Prevalence Detection Rate
## 0.87007042 0.75921376 0.75890663
## Detection Prevalence Balanced Accuracy
## 0.98525799 0.52977222
ad_tda_pc_5.40.5_n4_svm_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n4_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.40.5_svm_n4_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.40.5_n4_svm_fit_re)
diff_tda_pca_5.40.5_svm_n4_3_fold
## Accuracy
## 1 -0.1333864
## 2 -0.1331678
## 3 -0.1356962
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_svm.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_svm_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_svm.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_svm.n4_3_fold_odds.left<-bst_tda_pca_5.40.5_svm.n4_3_fold$probLeft/bst_tda_pca_5.40.5_svm.n4_3_fold$probRight
bst_tda_pca_5.40.5_svm.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_svm.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_svm_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_svm.n4_3_fold
## $winLeft
## [1] 0.99
##
## $winRope
## [1] 0.01
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.40.5_svm.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_svm_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_svm.n4_3_fold
## $left
## [1] 0.9999717
##
## $rope
## [1] 7.31671e-06
##
## $right
## [1] 2.10061e-05
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_svm_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_svm_n4_3_fold))
#bf_tda_pca_5.40.5_rf.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_svm_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_svm_n4_3_fold)
## t = -165.78, df = 2, p-value = 3.638e-05
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1375635 -0.1306034
## sample estimates:
## mean of x
## -0.1340835
### Test set diff
diff_tda_pca_5.40.5_svm.n4_test<-(svm_cf_ov_acc - ad_tda_pc_5.40.5_n4_svm_cf0_ov_acc)
diff_tda_pca_5.40.5_svm.n4_test
## Accuracy
## 0.05364455
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_svm.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_svm.n4_test),-0.01,0.01)
bst_tda_pca_5.40.5_svm.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_svm.n4_test_odds.left<-bst_tda_pca_5.40.5_svm.n4_test$probLeft/bst_tda_pca_5.40.5_svm.n4_test$probRight
bst_tda_pca_5.40.5_svm.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_svm.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_svm.n4_test),-0.01,0.01)
bsr_tda_pca_5.40.5_svm.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1574333
##
## $winRight
## [1] 0.8425667
# Bayesian Correlated Test
bct_tda_pca_5.40.5_svm.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_svm.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_svm.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_svm.n4_test)))
#BayesFactor
#bf_tda_pca_5.40.5_svm.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_svm.n4_test)) #bf_tda_pca_5.40.5_svm.n4_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_svm.n4_test))
##Node5
Adult_TDA_PC_5.40.5_n5_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n5.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.Doctorate, V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V8.Husband
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_PC_5.40.5_n5_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 12081 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8054, 8053, 8055
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.9996689 0
## 0.1 0.50 0.9996689 0
## 0.1 0.75 0.9996689 0
## 0.1 1.00 0.9996689 0
## 0.1 1.25 0.9996689 0
## 1.0 0.25 0.9996689 0
## 1.0 0.50 0.9996689 0
## 1.0 0.75 0.9996689 0
## 1.0 1.00 0.9996689 0
## 1.0 1.25 0.9996689 0
## 10.0 0.25 0.9996689 0
## 10.0 0.50 0.9996689 0
## 10.0 0.75 0.9996689 0
## 10.0 1.00 0.9996689 0
## 10.0 1.25 0.9996689 0
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 10 and C = 0.25.
Adult_TDA_PC_5.40.5_n5_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.9997517 0 Fold1
## 2 0.9995035 0 Fold2
## 3 0.9997516 0 Fold3
ad_tda_pc_5.40.5_n5_svm_fit_re<-Adult_TDA_PC_5.40.5_n5_SvmFit0 $resample[1]
summary(Adult_TDA_PC_5.40.5_n5_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_PC_5.40.5_n5_SvmFit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n5_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_PC_5.40.5_n5_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n5_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n5_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n5_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n5_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n5_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_pc_5.40.5_n5_svm_cf0_ov_acc<-ad_tda_pc_5.40.5_n5_svm_cf0$overall[1]
ad_tda_pc_5.40.5_n5_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_pc_5.40.5_n5_svm_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n5_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.40.5_svm_n5_3_fold<-(ad_svm_fit_re - ad_tda_pc_5.40.5_n5_svm_fit_re)
diff_tda_pca_5.40.5_svm_n5_3_fold
## Accuracy
## 1 -0.1758506
## 2 -0.1741777
## 3 -0.1775616
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_svm.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_svm_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_svm.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_svm.n5_3_fold_odds.left<-bst_tda_pca_5.40.5_svm.n5_3_fold$probLeft/bst_tda_pca_5.40.5_svm.n5_3_fold$probRight
bst_tda_pca_5.40.5_svm.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_svm.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_svm_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_svm.n5_3_fold
## $winLeft
## [1] 0.9919667
##
## $winRope
## [1] 0.008033333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.40.5_svm.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_svm_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_svm.n5_3_fold
## $left
## [1] 0.9999769
##
## $rope
## [1] 4.708338e-06
##
## $right
## [1] 1.841464e-05
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_svm_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_rf.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_svm_n5_3_fold))
#bf_tda_pca_5.40.5_rf.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_svm_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_svm_n5_3_fold)
## t = -180.03, df = 2, p-value = 3.085e-05
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1800664 -0.1716602
## sample estimates:
## mean of x
## -0.1758633
### Test set diff
diff_tda_pca_5.40.5_svm.n5_test<-(svm_cf_ov_acc - ad_tda_pc_5.40.5_n5_svm_cf0_ov_acc)
diff_tda_pca_5.40.5_svm.n5_test
## Accuracy
## 0.06777232
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_svm.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_svm.n5_test),-0.01,0.01)
bst_tda_pca_5.40.5_svm.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_svm.n5_test_odds.left<-bst_tda_pca_5.40.5_svm.n5_test$probLeft/bst_tda_pca_5.40.5_svm.n5_test$probRight
bst_tda_pca_5.40.5_svm.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_svm.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_svm.n5_test),-0.01,0.01)
bsr_tda_pca_5.40.5_svm.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1561667
##
## $winRight
## [1] 0.8438333
# Bayesian Correlated Test
bct_tda_pca_5.40.5_svm.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_svm.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_svm.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_svm.n5_test)))
#BayesFactor
#bf_tda_pca_5.40.5_svm.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_svm.n5_test)) #bf_tda_pca_5.40.5_svm.n5_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_svm.n5_test))
##With TDA KDE filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_KDE_5.40.5_n1_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n1.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.40.5_n1_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 11838 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7892, 7893, 7891
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.8002198 0.35293821
## 0.1 0.50 0.8086672 0.40635211
## 0.1 0.75 0.8121308 0.43444500
## 0.1 1.00 0.8125533 0.44556808
## 0.1 1.25 0.8117085 0.44916807
## 1.0 0.25 0.7590813 0.11678424
## 1.0 0.50 0.7716675 0.19359877
## 1.0 0.75 0.7791863 0.24762956
## 1.0 1.00 0.7827345 0.27866952
## 1.0 1.25 0.7845083 0.30034452
## 10.0 0.25 0.7405812 0.00000000
## 10.0 0.50 0.7412569 0.00633962
## 10.0 0.75 0.7453960 0.03531713
## 10.0 1.00 0.7502112 0.06976379
## 10.0 1.25 0.7508026 0.08347198
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.
Adult_TDA_KDE_5.40.5_n1_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.8193107 0.4667671 Fold1
## 2 0.8124208 0.4454626 Fold2
## 3 0.8059286 0.4244746 Fold3
ad_tda_kde_5.40.5_n1_svm_fit_re<-Adult_TDA_KDE_5.40.5_n1_SvmFit0 $resample[1]
summary(Adult_TDA_PC_5.40.5_n1_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_KDE_5.40.5_n1_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.40.5_n1_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_KDE_5.40.5_n1_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n1_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n1_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n1_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7044 1093
## >50K 372 1259
##
## Accuracy : 0.85
## 95% CI : (0.8428, 0.857)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5418
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9498
## Specificity : 0.5353
## Pos Pred Value : 0.8657
## Neg Pred Value : 0.7719
## Prevalence : 0.7592
## Detection Rate : 0.7211
## Detection Prevalence : 0.8330
## Balanced Accuracy : 0.7426
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n1_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7044 1093
## >50K 372 1259
##
## Accuracy : 0.85
## 95% CI : (0.8428, 0.857)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5418
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9498
## Specificity : 0.5353
## Pos Pred Value : 0.8657
## Neg Pred Value : 0.7719
## Prevalence : 0.7592
## Detection Rate : 0.7211
## Detection Prevalence : 0.8330
## Balanced Accuracy : 0.7426
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n1_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.500205e-01 5.418375e-01 8.427843e-01 8.570472e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.658854e-108 6.127894e-79
ad_tda_kde_5.40.5_n1_svm_cf0_ov_acc<-ad_tda_kde_5.40.5_n1_svm_cf0$overall[1]
ad_tda_kde_5.40.5_n1_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9498382 0.5352891 0.8656753
## Neg Pred Value Precision Recall
## 0.7719191 0.8656753 0.9498382
## F1 Prevalence Detection Rate
## 0.9058060 0.7592138 0.7211302
## Detection Prevalence Balanced Accuracy
## 0.8330262 0.7425637
ad_tda_kde_5.40.5_n1_svm_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n1_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_svm_n1_3_fold<-(ad_svm_fit_re - ad_tda_kde_5.40.5_n1_svm_fit_re)
diff_tda_kde_5.40.5_svm_n1_3_fold
## Accuracy
## 1 0.004590332
## 2 0.012905001
## 3 0.016261497
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_svm.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_svm_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_svm.n1_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_svm.n1_3_fold_odds.left<-bst_tda_kde_5.40.5_svm.n1_3_fold$probLeft/bst_tda_kde_5.40.5_svm.n1_3_fold$probRight
bst_tda_kde_5.40.5_svm.n1_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_svm.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_svm_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_svm.n1_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.4883
##
## $winRight
## [1] 0.5117
# Bayesian Correlated Test
bct_tda_kde_5.40.5_svm.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_svm_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_svm.n1_3_fold
## $left
## [1] 0.01686917
##
## $rope
## [1] 0.3752071
##
## $right
## [1] 0.6079237
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_svm_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_svm.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_svm_n1_3_fold))
#bf_tda_kde_5.40.5_svm.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_svm_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_svm_n1_3_fold)
## t = 3.2436, df = 2, p-value = 0.08334
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.003673783 0.026178336
## sample estimates:
## mean of x
## 0.01125228
### Test set diff
diff_tda_kde_5.40.5_svm.n1_test<-(svm_cf_ov_acc-ad_tda_kde_5.40.5_n1_svm_cf0_ov_acc)
diff_tda_kde_5.40.5_svm.n1_test
## Accuracy
## -0.0230344
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_svm.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_svm.n1_test),-0.01,0.01)
bst_tda_kde_5.40.5_svm.n1_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_svm.n1_test_odds.left<-bst_tda_kde_5.40.5_svm.n1_test$probLeft/bst_tda_kde_5.40.5_svm.n1_test$probRight
bst_tda_kde_5.40.5_svm.n1_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_svm.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_svm.n1_test),-0.01,0.01)
bsr_tda_kde_5.40.5_svm.n1_test
## $winLeft
## [1] 0.8404333
##
## $winRope
## [1] 0.1595667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_svm.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_svm.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_svm.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_svm.n1_test))
#BayesFactor
#bf_tda_kde_5.40.5_svm.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_svm.n1_test)) #bf_tda_kde_5.40.5_svm.n1_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_svm.n1_test))
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node2
Adult_TDA_KDE_5.40.5_n2_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n2.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.1st.4th, V4.5th.6th, V4.7th.8th,
## V4.9th, V4.Doctorate, V4.Preschool
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.40.5_n2_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 11203 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7468, 7469, 7469
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.7882714 0.3760350510
## 0.1 0.50 0.7997861 0.4351806953
## 0.1 0.75 0.8021071 0.4536716548
## 0.1 1.00 0.8032677 0.4651262148
## 0.1 1.25 0.8038032 0.4710841314
## 1.0 0.25 0.7457826 0.1474372360
## 1.0 0.50 0.7630102 0.2421037553
## 1.0 0.75 0.7704190 0.2888410584
## 1.0 1.00 0.7737216 0.3180011267
## 1.0 1.25 0.7723825 0.3261746724
## 10.0 0.25 0.7194502 0.0004574013
## 10.0 0.50 0.7231992 0.0217294972
## 10.0 0.75 0.7275730 0.0543398943
## 10.0 1.00 0.7320360 0.0891427200
## 10.0 1.25 0.7332856 0.1076412483
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_KDE_5.40.5_n2_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.7962517 0.4489763 Fold1
## 2 0.8031601 0.4727827 Fold3
## 3 0.8119979 0.4914934 Fold2
ad_tda_kde_5.40.5_n2_svm_fit_re<-Adult_TDA_KDE_5.40.5_n2_SvmFit0$resample[1]
summary(Adult_TDA_KDE_5.40.5_n2_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_KDE_5.40.5_n2_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.40.5_n2_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_KDE_5.40.5_n2_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n2_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n2_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n2_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7044 1046
## >50K 372 1306
##
## Accuracy : 0.8548
## 95% CI : (0.8477, 0.8618)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5599
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9498
## Specificity : 0.5553
## Pos Pred Value : 0.8707
## Neg Pred Value : 0.7783
## Prevalence : 0.7592
## Detection Rate : 0.7211
## Detection Prevalence : 0.8282
## Balanced Accuracy : 0.7526
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n2_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7044 1046
## >50K 372 1306
##
## Accuracy : 0.8548
## 95% CI : (0.8477, 0.8618)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5599
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9498
## Specificity : 0.5553
## Pos Pred Value : 0.8707
## Neg Pred Value : 0.7783
## Prevalence : 0.7592
## Detection Rate : 0.7211
## Detection Prevalence : 0.8282
## Balanced Accuracy : 0.7526
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n2_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.548321e-01 5.598899e-01 8.476893e-01 8.617626e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 7.044745e-121 1.943359e-71
ad_tda_kde_5.40.5_n2_svm_cf0_ov_acc<-ad_tda_kde_5.40.5_n2_svm_cf0$overall[1]
ad_tda_kde_5.40.5_n2_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9498382 0.5552721 0.8707046
## Neg Pred Value Precision Recall
## 0.7783075 0.8707046 0.9498382
## F1 Prevalence Detection Rate
## 0.9085515 0.7592138 0.7211302
## Detection Prevalence Balanced Accuracy
## 0.8282146 0.7525551
ad_tda_kde_5.40.5_n2_svm_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n2_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_svm_n2_3_fold<-(ad_svm_fit_re - ad_tda_kde_5.40.5_n2_svm_fit_re)
diff_tda_kde_5.40.5_svm_n2_3_fold
## Accuracy
## 1 0.02764935
## 2 0.02216564
## 3 0.01019219
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_svm.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_svm_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_svm.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_svm.n2_3_fold_odds.left<-bst_tda_kde_5.40.5_svm.n2_3_fold$probLeft/bst_tda_kde_5.40.5_svm.n2_3_fold$probRight
bst_tda_kde_5.40.5_svm.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_svm.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_svm_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_svm.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.03666667
##
## $winRight
## [1] 0.9633333
# Bayesian Correlated Test
bct_tda_kde_5.40.5_svm.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_svm_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_svm.n2_3_fold
## $left
## [1] 0.01858525
##
## $rope
## [1] 0.09884104
##
## $right
## [1] 0.8825737
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_svm_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_svm.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_svm_n2_3_fold))
#bf_tda_kde_5.40.5_svm.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_svm_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_svm_n2_3_fold)
## t = 3.8808, df = 2, p-value = 0.06044
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.002174408 0.042179196
## sample estimates:
## mean of x
## 0.02000239
### Test set diff
diff_tda_kde_5.40.5_svm.n2_test<-(svm_cf_ov_acc-ad_tda_kde_5.40.5_n2_svm_cf0_ov_acc)
diff_tda_kde_5.40.5_svm.n2_test
## Accuracy
## -0.02784603
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_svm.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_svm.n2_test),-0.01,0.01)
bst_tda_kde_5.40.5_svm.n2_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_svm.n2_test_odds.left<-bst_tda_kde_5.40.5_svm.n2_test$probLeft/bst_tda_kde_5.40.5_svm.n2_test$probRight
bst_tda_kde_5.40.5_svm.n2_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_svm.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_svm.n2_test),-0.01,0.01)
bsr_tda_kde_5.40.5_svm.n2_test
## $winLeft
## [1] 0.8419
##
## $winRope
## [1] 0.1581
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_svm.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_svm.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_svm.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_svm.n2_test))
#BayesFactor
#bf_tda_kde_5.40.5_svm.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_svm.n2_test)) #bf_tda_kde_5.40.5_svm.n2_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_svm.n2_test))
##Node3
Adult_TDA_KDE_5.40.5_n3_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n3.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Honduras
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.40.5_n3_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 10351 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 6901, 6900, 6901
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.7800214 0.368004396
## 0.1 0.50 0.7915178 0.426074193
## 0.1 0.75 0.7922908 0.439452215
## 0.1 1.00 0.7938364 0.449779851
## 0.1 1.25 0.7938366 0.453990057
## 1.0 0.25 0.7421504 0.165743935
## 1.0 0.50 0.7541301 0.236100174
## 1.0 0.75 0.7647571 0.294722585
## 1.0 1.00 0.7691044 0.322811763
## 1.0 1.25 0.7692976 0.334532494
## 10.0 0.25 0.7148102 0.002029263
## 10.0 0.50 0.7175152 0.023948703
## 10.0 0.75 0.7235050 0.065507851
## 10.0 1.00 0.7296880 0.109367278
## 10.0 1.25 0.7305574 0.124985006
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_KDE_5.40.5_n3_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.7939130 0.4622528 Fold1
## 2 0.7959420 0.4506133 Fold3
## 3 0.7916546 0.4491040 Fold2
ad_tda_kde_5.40.5_n3_svm_fit_re<-Adult_TDA_KDE_5.40.5_n3_SvmFit0 $resample[1]
summary(Adult_TDA_KDE_5.40.5_n3_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_KDE_5.40.5_n3_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.40.5_n3_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_KDE_5.40.5_n3_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n3_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n3_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n3_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7009 1102
## >50K 407 1250
##
## Accuracy : 0.8455
## 95% CI : (0.8382, 0.8526)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5301
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9451
## Specificity : 0.5315
## Pos Pred Value : 0.8641
## Neg Pred Value : 0.7544
## Prevalence : 0.7592
## Detection Rate : 0.7175
## Detection Prevalence : 0.8304
## Balanced Accuracy : 0.7383
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n3_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7009 1102
## >50K 407 1250
##
## Accuracy : 0.8455
## 95% CI : (0.8382, 0.8526)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5301
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9451
## Specificity : 0.5315
## Pos Pred Value : 0.8641
## Neg Pred Value : 0.7544
## Prevalence : 0.7592
## Detection Rate : 0.7175
## Detection Prevalence : 0.8304
## Balanced Accuracy : 0.7383
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n3_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.455160e-01 5.300580e-01 8.381947e-01 8.526305e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.272474e-97 2.190137e-71
ad_tda_kde_5.40.5_n3_svm_cf0_ov_acc<-ad_tda_kde_5.40.5_n3_svm_cf0$overall[1]
ad_tda_kde_5.40.5_n3_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9451187 0.5314626 0.8641351
## Neg Pred Value Precision Recall
## 0.7543754 0.8641351 0.9451187
## F1 Prevalence Detection Rate
## 0.9028145 0.7592138 0.7175471
## Detection Prevalence Balanced Accuracy
## 0.8303645 0.7382906
ad_tda_kde_5.40.5_n3_svm_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n3_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_svm_n3_3_fold<-(ad_svm_fit_re - ad_tda_kde_5.40.5_n3_svm_fit_re)
diff_tda_kde_5.40.5_svm_n3_3_fold
## Accuracy
## 1 0.02998798
## 2 0.02938376
## 3 0.03053546
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_svm.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_svm_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_svm.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_svm.n3_3_fold_odds.left<-bst_tda_kde_5.40.5_svm.n3_3_fold$probLeft/bst_tda_kde_5.40.5_svm.n3_3_fold$probRight
bst_tda_kde_5.40.5_svm.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_svm.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_svm_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_svm.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.009166667
##
## $winRight
## [1] 0.9908333
# Bayesian Correlated Test
bct_tda_kde_5.40.5_svm.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_svm_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_svm.n3_3_fold
## $left
## [1] 4.615823e-05
##
## $rope
## [1] 0.0001386839
##
## $right
## [1] 0.9998152
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_svm_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_svm.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_svm_n3_3_fold))
#bf_tda_kde_5.40.5_svm.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_svm_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_svm_n3_3_fold)
## t = 90.105, df = 2, p-value = 0.0001231
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.02853800 0.03140013
## sample estimates:
## mean of x
## 0.02996907
### Test set diff
diff_tda_kde_5.40.5_svm.n3_test<-(svm_cf_ov_acc-ad_tda_kde_5.40.5_n3_svm_cf0_ov_acc)
diff_tda_kde_5.40.5_svm.n3_test
## Accuracy
## -0.01852989
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_svm.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_svm.n3_test),-0.01,0.01)
bst_tda_kde_5.40.5_svm.n3_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_svm.n3_test_odds.left<-bst_tda_kde_5.40.5_svm.n3_test$probLeft/bst_tda_kde_5.40.5_svm.n3_test$probRight
bst_tda_kde_5.40.5_svm.n3_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_svm.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_svm.n3_test),-0.01,0.01)
bsr_tda_kde_5.40.5_svm.n3_test
## $winLeft
## [1] 0.5431667
##
## $winRope
## [1] 0.4568333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_svm.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_svm.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_svm.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_svm.n3_test))
#BayesFactor
#bf_tda_kde_5.40.5_svm.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_svm.n3_test)) #bf_tda_kde_5.40.5_svm.n3_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_svm.n3_test))
##Node4
Adult_TDA_KDE_5.40.5_n4_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n4.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Without.pay, V4.10th, V4.11th,
## V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands, V14.Hong, V14.Hungary
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Masters, V4.Preschool,
## V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.40.5_n4_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 8741 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 5828, 5827, 5827
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.8230179 0.2720075106
## 0.1 0.50 0.8311406 0.3489216475
## 0.1 0.75 0.8351447 0.3842342770
## 0.1 1.00 0.8381190 0.4091741614
## 0.1 1.25 0.8388057 0.4208327509
## 1.0 0.25 0.8060863 0.0992490769
## 1.0 0.50 0.8126070 0.1800648190
## 1.0 0.75 0.8171832 0.2342191615
## 1.0 1.00 0.8177554 0.2620286475
## 1.0 1.25 0.8175266 0.2812772185
## 10.0 0.25 0.7970484 0.0008982693
## 10.0 0.50 0.7969338 0.0099492462
## 10.0 0.75 0.7965904 0.0241208494
## 10.0 1.00 0.7979634 0.0552925407
## 10.0 1.25 0.8008236 0.0909170063
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 1.25.
Adult_TDA_KDE_5.40.5_n4_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.8389976 0.4181952 Fold1
## 2 0.8438572 0.4422786 Fold3
## 3 0.8335621 0.4020245 Fold2
ad_tda_kde_5.40.5_n4_svm_fit_re<-Adult_TDA_KDE_5.40.5_n4_SvmFit0 $resample[1]
summary(Adult_TDA_KDE_5.40.5_n4_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_KDE_5.40.5_n4_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.40.5_n_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_KDE_5.40.5_n4_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n4_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n4_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n4_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7135 1536
## >50K 281 816
##
## Accuracy : 0.814
## 95% CI : (0.8061, 0.8217)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3779
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9621
## Specificity : 0.3469
## Pos Pred Value : 0.8229
## Neg Pred Value : 0.7438
## Prevalence : 0.7592
## Detection Rate : 0.7304
## Detection Prevalence : 0.8877
## Balanced Accuracy : 0.6545
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n4_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7135 1536
## >50K 281 816
##
## Accuracy : 0.814
## 95% CI : (0.8061, 0.8217)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.3779
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9621
## Specificity : 0.3469
## Pos Pred Value : 0.8229
## Neg Pred Value : 0.7438
## Prevalence : 0.7592
## Detection Rate : 0.7304
## Detection Prevalence : 0.8877
## Balanced Accuracy : 0.6545
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n4_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.139844e-01 3.778921e-01 8.061234e-01 8.216578e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 4.867829e-39 3.188120e-190
ad_tda_kde_5.40.5_n4_svm_cf0_ov_acc<-ad_tda_kde_5.40.5_n4_svm_cf0$overall[1]
ad_tda_kde_5.40.5_n4_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9621090 0.3469388 0.8228578
## Neg Pred Value Precision Recall
## 0.7438469 0.8228578 0.9621090
## F1 Prevalence Detection Rate
## 0.8870517 0.7592138 0.7304464
## Detection Prevalence Balanced Accuracy
## 0.8876945 0.6545239
ad_tda_kde_5.40.5_n4_svm_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n4_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_svm_n4_3_fold<-(ad_svm_fit_re - ad_tda_kde_5.40.5_n4_svm_fit_re)
diff_tda_kde_5.40.5_svm_n4_3_fold
## Accuracy
## 1 -0.01509657
## 2 -0.01853145
## 3 -0.01137206
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_svm.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_svm_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_svm.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_svm.n4_3_fold_odds.left<-bst_tda_kde_5.40.5_svm.n4_3_fold$probLeft/bst_tda_kde_5.40.5_svm.n4_3_fold$probRight
bst_tda_kde_5.40.5_svm.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_svm.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_svm_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_svm.n4_3_fold
## $winLeft
## [1] 0.8341333
##
## $winRope
## [1] 0.1658667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_svm.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_svm_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_svm.n4_3_fold
## $left
## [1] 0.9143909
##
## $rope
## [1] 0.08111185
##
## $right
## [1] 0.00449723
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_svm_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_svm.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_svm_n4_3_fold))
#bf_tda_kde_5.40.5_svm.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_svm_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_svm_n4_3_fold)
## t = -7.2558, df = 2, p-value = 0.01847
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.023894911 -0.006105149
## sample estimates:
## mean of x
## -0.01500003
### Test set diff
diff_tda_kde_5.40.5_svm.n4_test<-(svm_cf_ov_acc-ad_tda_kde_5.40.5_n4_svm_cf0_ov_acc)
diff_tda_kde_5.40.5_svm.n4_test
## Accuracy
## 0.01300164
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_svm.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_svm.n4_test),-0.01,0.01)
bst_tda_kde_5.40.5_svm.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_svm.n4_test_odds.left<-bst_tda_kde_5.40.5_svm.n4_test$probLeft/bst_tda_kde_5.40.5_svm.n4_test$probRight
bst_tda_kde_5.40.5_svm.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_svm.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_svm.n4_test),-0.01,0.01)
bsr_tda_kde_5.40.5_svm.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.4563
##
## $winRight
## [1] 0.5437
# Bayesian Correlated Test
bct_tda_kde_5.40.5_svm.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_svm.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_svm.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_svm.n4_test))
#BayesFactor
#bf_tda_kde_5.40.5_svm.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_svm.n4_test)) #bf_tda_kde_5.40.5_svm.n4_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_svm.n4_test))
##Node5
Adult_TDA_KDE_5.40.5_n5_SvmFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n3.vec,
Importance = T,
method = 'svmRadial',
trControl = fitControl,
tuneGrid = svmGrid, preProc = c('center','scale'),
metric='Accuracy')
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V2.Never.worked, V2.Without.pay,
## V4.10th, V4.11th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate,
## V4.Preschool, V4.Prof.school, V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
## Warning in preProcess.default(thresh = 0.95, k = 5, freqCut = 19, uniqueCut =
## 10, : These variables have zero variances: V4.10th, V4.11th, V4.1st.4th,
## V4.5th.6th, V4.7th.8th, V4.9th, V4.Doctorate, V4.Preschool, V4.Prof.school,
## V14.Holand.Netherlands
## Warning in .local(x, ...): Variable(s) `' constant. Cannot scale data.
Adult_TDA_KDE_5.40.5_n5_SvmFit0
## Support Vector Machines with Radial Basis Function Kernel
##
## 10351 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## Pre-processing: centered (108), scaled (108)
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 6901, 6900, 6901
## Resampling results across tuning parameters:
##
## sigma C Accuracy Kappa
## 0.1 0.25 0.7834030 0.377727295
## 0.1 0.50 0.7966385 0.441966977
## 0.1 0.75 0.7988604 0.458211743
## 0.1 1.00 0.7988602 0.465076255
## 0.1 1.25 0.7983771 0.467523999
## 1.0 0.25 0.7439856 0.170709502
## 1.0 0.50 0.7564482 0.245145274
## 1.0 0.75 0.7643703 0.294259198
## 1.0 1.00 0.7670755 0.320583744
## 1.0 1.25 0.7663992 0.329880049
## 10.0 0.25 0.7147135 0.001256598
## 10.0 0.50 0.7179982 0.026031011
## 10.0 0.75 0.7242777 0.067356230
## 10.0 1.00 0.7276590 0.097153223
## 10.0 1.25 0.7293979 0.117707166
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were sigma = 0.1 and C = 0.75.
Adult_TDA_KDE_5.40.5_n5_SvmFit0$resample
## Accuracy Kappa Resample
## 1 0.8014493 0.4641849 Fold1
## 2 0.7945523 0.4409574 Fold2
## 3 0.8005797 0.4694929 Fold3
ad_tda_kde_5.40.5_n5_svm_fit_re<-Adult_TDA_KDE_5.40.5_n5_SvmFit0 $resample[1]
summary(Adult_TDA_KDE_5.40.5_n5_SvmFit0)
## Length Class Mode
## 1 ksvm S4
#vip(Adult_TDA_KDE_5.40.5_n5_SvmFit0,25) + ggtitle("Adult_TDA_KDE_5.40.5_n5_SvmFit TDA-Assited Svm")
# Predict outcome using Adult_TDA_KDE_5.40.5_n5_SvmFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n5_SvmFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n5_svm_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n5_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7057 1229
## >50K 359 1123
##
## Accuracy : 0.8374
## 95% CI : (0.83, 0.8447)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.4911
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9516
## Specificity : 0.4775
## Pos Pred Value : 0.8517
## Neg Pred Value : 0.7578
## Prevalence : 0.7592
## Detection Rate : 0.7225
## Detection Prevalence : 0.8483
## Balanced Accuracy : 0.7145
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n5_svm_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7057 1229
## >50K 359 1123
##
## Accuracy : 0.8374
## 95% CI : (0.83, 0.8447)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.4911
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9516
## Specificity : 0.4775
## Pos Pred Value : 0.8517
## Neg Pred Value : 0.7578
## Prevalence : 0.7592
## Detection Rate : 0.7225
## Detection Prevalence : 0.8483
## Balanced Accuracy : 0.7145
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n5_svm_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.374283e-01 4.910761e-01 8.299596e-01 8.446953e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.017543e-79 1.994170e-105
ad_tda_kde_5.40.5_n5_svm_cf0_ov_acc<-ad_tda_kde_5.40.5_n5_svm_cf0$overall[1]
ad_tda_kde_5.40.5_n5_svm_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9515912 0.4774660 0.8516775
## Neg Pred Value Precision Recall
## 0.7577598 0.8516775 0.9515912
## F1 Prevalence Detection Rate
## 0.8988664 0.7592138 0.7224611
## Detection Prevalence Balanced Accuracy
## 0.8482801 0.7145286
ad_tda_kde_5.40.5_n5_svm_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n5_svm_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_svm_n5_3_fold<-(ad_svm_fit_re - ad_tda_kde_5.40.5_n5_svm_fit_re)
diff_tda_kde_5.40.5_svm_n5_3_fold
## Accuracy
## 1 0.02245175
## 2 0.03077348
## 3 0.02161034
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_svm.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_svm_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_svm.n5_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_svm.n5_3_fold_odds.left<-bst_tda_kde_5.40.5_svm.n5_3_fold$probLeft/bst_tda_kde_5.40.5_svm.n5_3_fold$probRight
bst_tda_kde_5.40.5_svm.n5_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_svm.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_svm_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_svm.n5_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.009633333
##
## $winRight
## [1] 0.9903667
# Bayesian Correlated Test
bct_tda_kde_5.40.5_svm.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_svm_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_svm.n5_3_fold
## $left
## [1] 0.004603968
##
## $rope
## [1] 0.01911759
##
## $right
## [1] 0.9762784
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_svm_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_svm.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_svm_n5_3_fold))
#bf_tda_kde_5.40.5_svm.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_svm_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_svm_n5_3_fold)
## t = 8.5305, df = 2, p-value = 0.01347
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.01236316 0.03752723
## sample estimates:
## mean of x
## 0.02494519
### Test set diff
diff_tda_kde_5.40.5_svm.n5_test<-svm_cf_ov_acc-ad_tda_kde_5.40.5_n5_svm_cf0_ov_acc
diff_tda_kde_5.40.5_svm.n5_test
## Accuracy
## -0.01044226
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_svm.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_svm.n5_test),-0.01,0.01)
bst_tda_kde_5.40.5_svm.n5_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_svm.n5_test_odds.left<-bst_tda_kde_5.40.5_svm.n5_test$probLeft/bst_tda_kde_5.40.5_svm.n5_test$probRight
bst_tda_kde_5.40.5_svm.n5_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_svm.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_svm.n5_test),-0.01,0.01)
bsr_tda_kde_5.40.5_svm.n5_test
## $winLeft
## [1] 0.5395333
##
## $winRope
## [1] 0.4604667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_svm.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_svm.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_svm.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_svm.n5_test))
#BayesFactor
#bf_tda_kde_5.40.5_svm.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_svm.n5_test)) #bf_tda_kde_5.40.5_svm.n5_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_svm.n4_test))
#Non-TDA-Assisted
nn1Grid<-expand.grid(size = c(2,3,5,7), decay = c(0.3,0.5,0.7))
#Neural Network
adultNn1Fit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 11629.741848
## iter 10 value 8404.633624
## iter 20 value 8388.183097
## iter 30 value 7855.269958
## iter 40 value 7767.391165
## iter 50 value 7699.378577
## iter 60 value 7625.326110
## iter 70 value 7548.196492
## iter 80 value 7496.139199
## iter 90 value 7437.034038
## iter 100 value 7032.426324
## final value 7032.426324
## stopped after 100 iterations
## # weights: 331
## initial value 9617.027718
## iter 10 value 8226.246530
## iter 20 value 7945.275879
## iter 30 value 7787.313967
## iter 40 value 7786.735790
## iter 50 value 7772.786512
## iter 60 value 7766.331137
## iter 70 value 7761.640925
## iter 80 value 7756.551714
## iter 90 value 7743.855341
## iter 100 value 7571.852751
## final value 7571.852751
## stopped after 100 iterations
## # weights: 551
## initial value 17404.802175
## iter 10 value 7852.363583
## iter 20 value 7710.813019
## iter 30 value 7574.318909
## iter 40 value 7567.511013
## final value 7562.886549
## converged
## # weights: 771
## initial value 8637.960391
## iter 10 value 7793.980609
## iter 20 value 7762.128399
## iter 30 value 7753.531953
## iter 40 value 7606.087030
## iter 50 value 7470.598248
## iter 60 value 7192.097323
## iter 70 value 6128.153922
## iter 80 value 5416.556859
## iter 90 value 5337.424079
## iter 100 value 5245.759319
## final value 5245.759319
## stopped after 100 iterations
## # weights: 221
## initial value 12473.916184
## iter 10 value 7817.841489
## iter 20 value 7663.791179
## iter 30 value 7545.517018
## iter 40 value 7131.514356
## iter 50 value 6389.955849
## iter 60 value 6146.064097
## iter 70 value 6046.621048
## iter 80 value 5776.427856
## iter 90 value 5172.590943
## iter 100 value 4861.025334
## final value 4861.025334
## stopped after 100 iterations
## # weights: 331
## initial value 10057.242945
## iter 10 value 7794.531043
## iter 20 value 7724.291615
## iter 30 value 7635.564786
## iter 40 value 7541.821329
## iter 50 value 7386.157301
## iter 60 value 7090.283971
## iter 70 value 6640.344493
## iter 80 value 6302.815978
## iter 90 value 5921.153938
## iter 100 value 5396.223730
## final value 5396.223730
## stopped after 100 iterations
## # weights: 551
## initial value 8554.938739
## iter 10 value 8327.545112
## iter 20 value 7811.830943
## iter 30 value 7732.170574
## iter 40 value 7678.131061
## iter 50 value 7641.448376
## iter 60 value 7578.609440
## iter 70 value 7531.663357
## iter 80 value 7363.672633
## iter 90 value 7163.632624
## iter 100 value 6579.085908
## final value 6579.085908
## stopped after 100 iterations
## # weights: 771
## initial value 8554.093702
## iter 10 value 8367.755536
## iter 20 value 8230.673865
## iter 30 value 7811.841864
## iter 40 value 7761.912442
## iter 50 value 7748.513221
## iter 60 value 7729.517835
## iter 70 value 7630.898565
## iter 80 value 7562.774584
## iter 90 value 7517.735599
## iter 100 value 7462.030694
## final value 7462.030694
## stopped after 100 iterations
## # weights: 221
## initial value 9291.600833
## iter 10 value 8367.661767
## iter 20 value 7778.924219
## iter 30 value 7764.922158
## iter 40 value 7758.759573
## iter 50 value 7757.261349
## iter 60 value 7753.229296
## iter 70 value 7597.879121
## iter 80 value 6950.157587
## iter 90 value 6630.883136
## iter 100 value 6395.412937
## final value 6395.412937
## stopped after 100 iterations
## # weights: 331
## initial value 8588.096604
## iter 10 value 8337.094544
## iter 20 value 7950.522211
## iter 30 value 7917.620212
## iter 40 value 7754.734913
## iter 50 value 7741.843526
## iter 60 value 7737.005111
## iter 70 value 7694.533294
## iter 80 value 7616.368507
## iter 90 value 7580.772246
## iter 100 value 7529.368489
## final value 7529.368489
## stopped after 100 iterations
## # weights: 551
## initial value 16411.432178
## iter 10 value 8233.252963
## iter 20 value 7909.276011
## iter 30 value 7790.411320
## iter 40 value 7778.469722
## iter 50 value 7742.951602
## iter 60 value 7718.946054
## iter 70 value 7649.591064
## iter 80 value 7539.163996
## iter 90 value 7279.465327
## iter 100 value 7002.242162
## final value 7002.242162
## stopped after 100 iterations
## # weights: 771
## initial value 9106.391912
## iter 10 value 7820.454887
## iter 20 value 7797.090646
## iter 30 value 7710.854306
## iter 40 value 7670.835505
## iter 50 value 7663.182137
## iter 60 value 7649.622968
## iter 70 value 7479.348118
## iter 80 value 7419.749332
## iter 90 value 7317.350304
## iter 100 value 7258.364718
## final value 7258.364718
## stopped after 100 iterations
## # weights: 221
## initial value 9610.156462
## iter 10 value 7777.692025
## iter 20 value 7750.966723
## iter 30 value 7729.098034
## iter 40 value 7724.805424
## iter 50 value 7633.047425
## iter 60 value 7577.524089
## iter 70 value 7521.032220
## iter 80 value 7459.905675
## iter 90 value 7055.410753
## iter 100 value 6326.634920
## final value 6326.634920
## stopped after 100 iterations
## # weights: 331
## initial value 12558.499576
## iter 10 value 8008.913934
## iter 20 value 7648.552327
## iter 30 value 7619.614699
## iter 40 value 7524.924373
## iter 50 value 7503.170436
## iter 60 value 7481.133194
## iter 70 value 7458.891382
## iter 80 value 7328.010216
## iter 90 value 7156.356219
## iter 100 value 6692.241542
## final value 6692.241542
## stopped after 100 iterations
## # weights: 551
## initial value 12380.225994
## iter 10 value 8330.717080
## iter 20 value 7694.428585
## iter 30 value 7642.208249
## iter 40 value 7600.030144
## iter 50 value 7584.819298
## iter 60 value 7516.492226
## iter 70 value 7421.911817
## iter 80 value 7274.808269
## iter 90 value 7159.658453
## iter 100 value 7046.420660
## final value 7046.420660
## stopped after 100 iterations
## # weights: 771
## initial value 13826.246329
## iter 10 value 8302.232523
## iter 20 value 7783.837203
## iter 30 value 7629.959015
## iter 40 value 7544.817231
## iter 50 value 7504.910246
## iter 60 value 7438.365335
## iter 70 value 7375.958992
## iter 80 value 7260.039222
## iter 90 value 7197.944703
## iter 100 value 7093.593100
## final value 7093.593100
## stopped after 100 iterations
## # weights: 221
## initial value 10263.335172
## iter 10 value 8099.749382
## iter 20 value 7732.083237
## iter 30 value 7727.159475
## iter 40 value 7725.351145
## iter 50 value 7579.836736
## iter 60 value 7456.055274
## iter 70 value 7376.318677
## iter 80 value 7216.584612
## iter 90 value 6727.779988
## iter 100 value 6573.321377
## final value 6573.321377
## stopped after 100 iterations
## # weights: 331
## initial value 18598.467508
## iter 10 value 7805.611217
## iter 20 value 7674.429830
## iter 30 value 7358.723049
## iter 40 value 6659.905358
## iter 50 value 6481.701448
## iter 60 value 5615.373567
## iter 70 value 5343.977708
## iter 80 value 5095.989787
## iter 90 value 4920.588587
## iter 100 value 4854.556482
## final value 4854.556482
## stopped after 100 iterations
## # weights: 551
## initial value 9554.597505
## iter 10 value 8006.342771
## iter 20 value 7731.092779
## iter 30 value 7616.209839
## iter 40 value 7563.661869
## iter 50 value 7557.079966
## iter 60 value 7555.398224
## iter 70 value 7555.100036
## iter 80 value 7414.717845
## iter 90 value 7276.553457
## iter 100 value 6596.539808
## final value 6596.539808
## stopped after 100 iterations
## # weights: 771
## initial value 9678.867457
## iter 10 value 8285.962852
## iter 20 value 7757.106135
## iter 30 value 7721.907600
## iter 40 value 7708.298546
## iter 50 value 7692.119572
## iter 60 value 7543.519797
## iter 70 value 7491.609604
## iter 80 value 7460.221053
## iter 90 value 7447.108429
## iter 100 value 7430.751410
## final value 7430.751410
## stopped after 100 iterations
## # weights: 221
## initial value 9042.892929
## iter 10 value 8361.529468
## iter 20 value 7796.041553
## iter 30 value 7695.816636
## iter 40 value 7584.217177
## iter 50 value 7581.378662
## iter 60 value 7551.814987
## iter 70 value 7536.800358
## iter 80 value 7518.963273
## iter 90 value 7490.596810
## iter 100 value 7445.584911
## final value 7445.584911
## stopped after 100 iterations
## # weights: 331
## initial value 8465.620235
## iter 10 value 7861.684726
## iter 20 value 7737.750755
## iter 30 value 7687.522558
## iter 40 value 7683.602069
## iter 50 value 7669.238169
## iter 60 value 7638.726016
## iter 70 value 7433.825827
## iter 80 value 6854.521323
## iter 90 value 6025.900778
## iter 100 value 5356.132255
## final value 5356.132255
## stopped after 100 iterations
## # weights: 551
## initial value 20850.731604
## iter 10 value 8264.244072
## iter 20 value 7729.178744
## iter 30 value 7710.808875
## iter 40 value 7632.348546
## iter 50 value 7591.523351
## iter 60 value 7533.278425
## iter 70 value 7352.848851
## iter 80 value 7206.304109
## iter 90 value 6714.379139
## iter 100 value 6110.177830
## final value 6110.177830
## stopped after 100 iterations
## # weights: 771
## initial value 9122.089677
## iter 10 value 8335.713744
## iter 20 value 7747.778253
## iter 30 value 7656.764533
## iter 40 value 7548.455025
## iter 50 value 7525.584014
## iter 60 value 7480.014032
## iter 70 value 7361.175370
## iter 80 value 6996.872457
## iter 90 value 6379.834448
## iter 100 value 5606.125951
## final value 5606.125951
## stopped after 100 iterations
## # weights: 221
## initial value 9613.221925
## final value 8389.316789
## converged
## # weights: 331
## initial value 17225.352190
## iter 10 value 7909.926849
## iter 20 value 7900.912417
## iter 30 value 7751.510095
## iter 40 value 7732.869739
## iter 50 value 7676.818554
## iter 60 value 7564.248472
## iter 70 value 7550.319442
## iter 80 value 7550.250664
## iter 90 value 7548.785735
## iter 100 value 7542.281246
## final value 7542.281246
## stopped after 100 iterations
## # weights: 551
## initial value 9767.278558
## iter 10 value 8221.098929
## iter 20 value 7699.981749
## iter 30 value 7697.199372
## iter 40 value 7670.696919
## iter 50 value 7657.530851
## iter 60 value 7619.676272
## iter 70 value 7404.564385
## iter 80 value 7385.851065
## iter 90 value 7108.911593
## iter 100 value 6717.570990
## final value 6717.570990
## stopped after 100 iterations
## # weights: 771
## initial value 11207.863505
## iter 10 value 7808.832604
## iter 20 value 7744.220896
## iter 30 value 7739.299506
## iter 40 value 7734.893387
## iter 50 value 7730.663360
## iter 60 value 7710.062795
## iter 70 value 7688.368847
## iter 80 value 7634.430710
## iter 90 value 7485.435662
## iter 100 value 7452.444331
## final value 7452.444331
## stopped after 100 iterations
## # weights: 221
## initial value 8571.871616
## iter 10 value 8390.760363
## iter 20 value 8389.355198
## iter 30 value 8389.338858
## iter 30 value 8389.338790
## iter 30 value 8389.338724
## final value 8389.338724
## converged
## # weights: 331
## initial value 18374.046492
## iter 10 value 8390.539328
## iter 20 value 8389.357308
## iter 30 value 8389.338840
## iter 30 value 8389.338778
## iter 30 value 8389.338760
## final value 8389.338760
## converged
## # weights: 551
## initial value 8902.541198
## iter 10 value 8345.608310
## iter 20 value 7844.059793
## iter 30 value 7778.170863
## iter 40 value 6956.605429
## iter 50 value 6067.809074
## iter 60 value 5718.071850
## iter 70 value 5369.389992
## iter 80 value 5189.885666
## iter 90 value 5004.665450
## iter 100 value 4937.034564
## final value 4937.034564
## stopped after 100 iterations
## # weights: 771
## initial value 12523.715791
## iter 10 value 8176.208141
## iter 20 value 7740.916425
## iter 30 value 7726.479510
## iter 40 value 7715.584892
## iter 50 value 7678.202144
## iter 60 value 7632.786379
## iter 70 value 7570.873765
## iter 80 value 7544.762233
## iter 90 value 7469.122873
## iter 100 value 7465.773503
## final value 7465.773503
## stopped after 100 iterations
## # weights: 221
## initial value 8398.848615
## iter 10 value 7762.582119
## iter 20 value 7672.792303
## iter 30 value 7519.264416
## iter 40 value 7436.261325
## iter 50 value 7387.471742
## iter 60 value 6509.652314
## iter 70 value 5855.832568
## iter 80 value 5494.715972
## iter 90 value 5258.104011
## iter 100 value 5113.593139
## final value 5113.593139
## stopped after 100 iterations
## # weights: 331
## initial value 9068.512789
## iter 10 value 8389.433189
## final value 8389.426830
## converged
## # weights: 551
## initial value 8429.860299
## iter 10 value 8096.097339
## iter 20 value 7751.620165
## iter 30 value 7677.004000
## iter 40 value 7629.781811
## iter 50 value 7583.169765
## iter 60 value 7542.074893
## iter 70 value 7411.631706
## iter 80 value 7370.989789
## iter 90 value 7347.794903
## iter 100 value 7215.506312
## final value 7215.506312
## stopped after 100 iterations
## # weights: 771
## initial value 8987.929263
## iter 10 value 7786.198804
## iter 20 value 7773.766467
## iter 30 value 7699.864446
## iter 40 value 7660.231585
## iter 50 value 7554.419407
## iter 60 value 7533.095787
## iter 70 value 7464.242833
## iter 80 value 7437.318014
## iter 90 value 7424.675389
## iter 100 value 7421.365856
## final value 7421.365856
## stopped after 100 iterations
## # weights: 221
## initial value 15157.138791
## iter 10 value 12584.329249
## final value 12582.716271
## converged
adultNn1Fit
## Neural Network
##
## 22793 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 15195, 15195, 15196
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.7919084 0.2951526
## 2 0.5 0.8008142 0.2630162
## 2 0.7 0.8199899 0.4250339
## 3 0.3 0.7956391 0.3331653
## 3 0.5 0.8195472 0.3758177
## 3 0.7 0.8025252 0.2665166
## 5 0.3 0.7952438 0.2718914
## 5 0.5 0.8195511 0.4093591
## 5 0.7 0.8016057 0.4044819
## 7 0.3 0.8144159 0.3853184
## 7 0.5 0.7980080 0.2762891
## 7 0.7 0.8102920 0.3821858
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 2 and decay = 0.7.
adultNn1Fit$resample
## Accuracy Kappa Resample
## 1 0.8499408 0.5788401 Fold3
## 2 0.7929718 0.3048361 Fold2
## 3 0.8170571 0.3914254 Fold1
ad_nn1_fit_re<-adultNn1Fit$resample[1]
summary(adultNn1Fit)
## a 108-2-1 network with 221 weights
## options were - entropy fitting decay=0.7
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 0.00 0.00 0.00
## b->o h1->o h2->o
## -0.57 0.00 -0.57
vip(adultNn1Fit,25) + ggtitle("non-TDA-Assited NN")

# Predict outcome using model from training data based on testing data
predictions <- predict(adultNn1Fit, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
nn1_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
nn1_cf
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
nn1_cf$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
nn1_cf_ov_acc<-nn1_cf$overall[1]
nn1_cf$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
nn1_cf_pre_rec_f1<-nn1_cf$byClass[5:7]
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node1
#Neural Network 1
Adult_TDA_PC_5.40.5_n1_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n1.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 837.414238
## iter 10 value 103.737182
## iter 20 value 102.458842
## iter 30 value 101.774210
## iter 40 value 101.205567
## iter 50 value 101.090879
## iter 60 value 101.040971
## iter 70 value 101.010415
## final value 101.010412
## converged
## # weights: 331
## initial value 955.671454
## iter 10 value 110.207267
## iter 20 value 106.876041
## iter 30 value 103.439238
## iter 40 value 102.311889
## iter 50 value 101.743073
## iter 60 value 101.326195
## iter 70 value 101.082315
## iter 80 value 100.936649
## iter 90 value 100.676155
## iter 100 value 100.600236
## final value 100.600236
## stopped after 100 iterations
## # weights: 551
## initial value 1152.397778
## iter 10 value 120.126095
## iter 20 value 102.061519
## iter 30 value 101.900168
## iter 40 value 101.235515
## iter 50 value 100.599606
## iter 60 value 100.579420
## iter 70 value 100.359751
## iter 80 value 100.305178
## iter 90 value 100.293545
## iter 100 value 100.293154
## final value 100.293154
## stopped after 100 iterations
## # weights: 771
## initial value 2564.700605
## iter 10 value 108.886670
## iter 20 value 107.751749
## iter 30 value 107.597436
## iter 40 value 107.205297
## iter 50 value 107.067517
## iter 60 value 106.752081
## iter 70 value 106.668090
## iter 80 value 106.325166
## iter 90 value 100.056543
## iter 100 value 98.881307
## final value 98.881307
## stopped after 100 iterations
## # weights: 221
## initial value 1518.400930
## iter 10 value 114.467111
## iter 20 value 105.990971
## iter 30 value 105.639716
## iter 40 value 102.864557
## iter 50 value 102.682544
## iter 60 value 102.679633
## final value 102.679433
## converged
## # weights: 331
## initial value 1805.136576
## iter 10 value 117.247285
## iter 20 value 115.857565
## iter 30 value 104.826760
## iter 40 value 103.892989
## iter 50 value 103.860592
## iter 60 value 103.423156
## iter 70 value 102.226602
## iter 80 value 102.031741
## iter 90 value 102.022828
## iter 100 value 101.959261
## final value 101.959261
## stopped after 100 iterations
## # weights: 551
## initial value 3714.982633
## iter 10 value 111.367283
## iter 20 value 107.807802
## iter 30 value 103.105630
## iter 40 value 101.936700
## iter 50 value 101.936508
## final value 101.936463
## converged
## # weights: 771
## initial value 1587.128988
## iter 10 value 106.276624
## iter 20 value 103.096019
## iter 30 value 102.346959
## iter 40 value 101.792973
## iter 50 value 101.659114
## iter 60 value 101.658714
## iter 70 value 101.646745
## iter 80 value 101.450882
## iter 90 value 101.450503
## iter 100 value 101.450165
## final value 101.450165
## stopped after 100 iterations
## # weights: 221
## initial value 1126.411930
## iter 10 value 106.991142
## iter 20 value 105.538130
## iter 30 value 105.396706
## iter 40 value 105.379993
## final value 105.379836
## converged
## # weights: 331
## initial value 1284.664180
## iter 10 value 107.364980
## iter 20 value 105.779124
## iter 30 value 105.495241
## iter 40 value 105.369487
## iter 50 value 104.262306
## iter 60 value 103.113124
## iter 70 value 101.574485
## iter 80 value 100.850194
## iter 90 value 100.757096
## iter 100 value 100.729303
## final value 100.729303
## stopped after 100 iterations
## # weights: 551
## initial value 1345.748224
## iter 10 value 128.611918
## iter 20 value 108.625963
## iter 30 value 103.916031
## iter 40 value 103.322253
## iter 50 value 102.114127
## iter 60 value 102.043037
## iter 70 value 102.021153
## iter 80 value 102.018748
## final value 102.018426
## converged
## # weights: 771
## initial value 2954.630951
## iter 10 value 135.183190
## iter 20 value 113.917598
## iter 30 value 102.607083
## iter 40 value 102.409326
## iter 50 value 102.376939
## iter 60 value 101.229898
## iter 70 value 97.975112
## iter 80 value 97.634486
## iter 90 value 97.248608
## iter 100 value 96.848272
## final value 96.848272
## stopped after 100 iterations
## # weights: 221
## initial value 1324.355656
## iter 10 value 114.020386
## iter 20 value 106.782381
## iter 30 value 102.933330
## iter 40 value 102.719570
## iter 50 value 102.702615
## iter 60 value 102.701317
## iter 70 value 102.649590
## iter 80 value 101.161637
## iter 90 value 100.628071
## iter 100 value 97.923546
## final value 97.923546
## stopped after 100 iterations
## # weights: 331
## initial value 635.770654
## iter 10 value 108.579383
## iter 20 value 101.971074
## iter 30 value 101.433644
## iter 40 value 101.156302
## iter 50 value 100.157790
## iter 60 value 98.922572
## iter 70 value 96.410758
## iter 80 value 93.647664
## iter 90 value 91.096687
## iter 100 value 80.914526
## final value 80.914526
## stopped after 100 iterations
## # weights: 551
## initial value 3515.505335
## iter 10 value 264.456468
## iter 20 value 107.647054
## iter 30 value 101.771916
## iter 40 value 101.721508
## iter 50 value 101.645958
## iter 60 value 99.954168
## iter 70 value 95.862133
## iter 80 value 91.940617
## iter 90 value 85.688060
## iter 100 value 75.200350
## final value 75.200350
## stopped after 100 iterations
## # weights: 771
## initial value 2098.950313
## iter 10 value 109.947153
## iter 20 value 105.439669
## iter 30 value 104.001946
## iter 40 value 102.304321
## iter 50 value 100.464593
## iter 60 value 100.266391
## iter 70 value 100.227342
## iter 80 value 99.749155
## iter 90 value 92.229742
## iter 100 value 79.905920
## final value 79.905920
## stopped after 100 iterations
## # weights: 221
## initial value 916.940332
## iter 10 value 120.746402
## iter 20 value 105.883426
## iter 30 value 105.873279
## iter 40 value 105.871903
## iter 50 value 105.773813
## iter 60 value 105.642152
## iter 70 value 104.276453
## iter 80 value 102.190223
## iter 90 value 90.140219
## iter 100 value 84.688957
## final value 84.688957
## stopped after 100 iterations
## # weights: 331
## initial value 2429.776282
## iter 10 value 107.174018
## iter 20 value 106.562223
## iter 30 value 103.963901
## iter 40 value 103.140708
## iter 50 value 102.939349
## iter 60 value 102.916899
## iter 70 value 102.912359
## final value 102.912295
## converged
## # weights: 551
## initial value 1371.765063
## iter 10 value 109.196898
## iter 20 value 107.454391
## iter 30 value 103.505350
## iter 40 value 100.402249
## iter 50 value 99.285273
## iter 60 value 97.473061
## iter 70 value 94.914944
## iter 80 value 94.370787
## iter 90 value 92.912581
## iter 100 value 87.030523
## final value 87.030523
## stopped after 100 iterations
## # weights: 771
## initial value 1601.927662
## iter 10 value 107.331251
## iter 20 value 103.986272
## iter 30 value 101.910374
## iter 40 value 101.772244
## iter 50 value 101.460754
## iter 60 value 100.519089
## iter 70 value 99.040983
## iter 80 value 96.939028
## iter 90 value 95.214478
## iter 100 value 90.455163
## final value 90.455163
## stopped after 100 iterations
## # weights: 221
## initial value 579.438455
## iter 10 value 130.873640
## iter 20 value 105.392012
## iter 30 value 105.390702
## iter 40 value 105.388549
## final value 105.388423
## converged
## # weights: 331
## initial value 1430.313237
## iter 10 value 106.550253
## iter 20 value 104.473659
## iter 30 value 104.065273
## iter 40 value 104.062359
## iter 50 value 104.062179
## final value 104.062173
## converged
## # weights: 551
## initial value 1315.360471
## iter 10 value 110.479699
## iter 20 value 104.120376
## iter 30 value 104.009507
## iter 40 value 103.939309
## iter 50 value 102.987919
## iter 60 value 102.730819
## iter 70 value 102.485352
## iter 80 value 101.562792
## iter 90 value 98.741759
## iter 100 value 94.929681
## final value 94.929681
## stopped after 100 iterations
## # weights: 771
## initial value 1099.917191
## iter 10 value 107.821940
## iter 20 value 106.846983
## iter 30 value 102.715150
## iter 40 value 102.112818
## iter 50 value 100.777573
## iter 60 value 98.930876
## iter 70 value 98.647621
## iter 80 value 98.364021
## iter 90 value 97.314394
## iter 100 value 92.744542
## final value 92.744542
## stopped after 100 iterations
## # weights: 221
## initial value 1759.907618
## iter 10 value 125.824731
## iter 20 value 113.772065
## iter 30 value 108.526773
## iter 40 value 108.203546
## iter 50 value 108.198743
## iter 60 value 108.195749
## final value 108.195483
## converged
## # weights: 331
## initial value 1290.108643
## iter 10 value 110.637738
## iter 20 value 107.803030
## iter 30 value 107.735075
## iter 40 value 106.564554
## iter 50 value 106.563919
## iter 60 value 106.563127
## iter 70 value 106.223955
## iter 80 value 104.969804
## iter 90 value 103.257492
## iter 100 value 102.482960
## final value 102.482960
## stopped after 100 iterations
## # weights: 551
## initial value 2123.086282
## iter 10 value 192.810402
## iter 20 value 108.697960
## iter 30 value 108.167215
## iter 40 value 106.767132
## iter 50 value 105.767496
## iter 60 value 105.250709
## iter 70 value 105.117014
## iter 80 value 104.724324
## iter 90 value 104.228610
## iter 100 value 101.442802
## final value 101.442802
## stopped after 100 iterations
## # weights: 771
## initial value 1450.708239
## iter 10 value 110.050575
## iter 20 value 105.662264
## iter 30 value 104.901939
## iter 40 value 103.997111
## iter 50 value 97.951029
## iter 60 value 86.907843
## iter 70 value 81.635697
## iter 80 value 81.370504
## iter 90 value 81.158073
## iter 100 value 79.947698
## final value 79.947698
## stopped after 100 iterations
## # weights: 221
## initial value 831.993347
## iter 10 value 110.489318
## iter 20 value 110.022006
## iter 30 value 106.597803
## iter 40 value 105.663163
## iter 50 value 105.577736
## iter 60 value 105.289331
## iter 70 value 105.250240
## iter 80 value 105.249290
## final value 105.243768
## converged
## # weights: 331
## initial value 2233.767871
## iter 10 value 203.852069
## iter 20 value 115.899507
## iter 30 value 109.116812
## iter 40 value 108.505379
## iter 50 value 106.333694
## iter 60 value 106.211646
## iter 70 value 104.355052
## iter 80 value 103.433913
## iter 90 value 102.240148
## iter 100 value 94.197182
## final value 94.197182
## stopped after 100 iterations
## # weights: 551
## initial value 1563.857258
## iter 10 value 111.270549
## iter 20 value 110.792524
## iter 30 value 108.660709
## iter 40 value 108.601860
## iter 50 value 108.192868
## iter 60 value 107.524965
## iter 70 value 107.132521
## iter 80 value 106.259084
## iter 90 value 105.821881
## iter 100 value 105.590769
## final value 105.590769
## stopped after 100 iterations
## # weights: 771
## initial value 4751.830627
## iter 10 value 110.264762
## iter 20 value 109.470304
## iter 30 value 106.304656
## iter 40 value 105.491086
## iter 50 value 105.025467
## iter 60 value 104.279570
## iter 70 value 100.033385
## iter 80 value 98.683845
## iter 90 value 95.235222
## iter 100 value 85.113444
## final value 85.113444
## stopped after 100 iterations
## # weights: 221
## initial value 2009.149001
## iter 10 value 116.051987
## iter 20 value 113.549227
## iter 30 value 111.324634
## iter 40 value 110.200185
## iter 50 value 110.129455
## iter 60 value 109.855425
## iter 70 value 107.799523
## iter 80 value 106.635389
## iter 90 value 106.617977
## final value 106.617520
## converged
## # weights: 331
## initial value 2182.015305
## iter 10 value 147.402476
## iter 20 value 109.363777
## iter 30 value 107.769240
## iter 40 value 106.497484
## iter 50 value 106.463973
## iter 60 value 106.454040
## iter 70 value 106.367251
## iter 80 value 106.330648
## iter 90 value 106.328732
## final value 106.328704
## converged
## # weights: 551
## initial value 3027.433426
## iter 10 value 130.841226
## iter 20 value 110.518161
## iter 30 value 109.410107
## iter 40 value 108.132537
## iter 50 value 103.807011
## iter 60 value 103.695580
## iter 70 value 103.684989
## final value 103.684948
## converged
## # weights: 771
## initial value 3101.627390
## iter 10 value 112.425151
## iter 20 value 108.178211
## iter 30 value 107.365002
## iter 40 value 106.911531
## iter 50 value 105.088015
## iter 60 value 103.855799
## iter 70 value 102.152717
## iter 80 value 100.905060
## iter 90 value 98.636805
## iter 100 value 92.694069
## final value 92.694069
## stopped after 100 iterations
## # weights: 221
## initial value 1305.010691
## iter 10 value 162.404838
## iter 20 value 160.072617
## iter 30 value 159.254733
## iter 40 value 158.669017
## iter 50 value 158.663626
## iter 60 value 158.662565
## iter 70 value 158.662179
## iter 70 value 158.662177
## iter 70 value 158.662177
## final value 158.662177
## converged
Adult_TDA_PC_5.40.5_n1_NN1Fit0
## Neural Network
##
## 3373 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 2248, 2249, 2249
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.9922918 0
## 2 0.5 0.9922918 0
## 2 0.7 0.9922918 0
## 3 0.3 0.9922918 0
## 3 0.5 0.9922918 0
## 3 0.7 0.9922918 0
## 5 0.3 0.9922918 0
## 5 0.5 0.9922918 0
## 5 0.7 0.9922918 0
## 7 0.3 0.9922918 0
## 7 0.5 0.9922918 0
## 7 0.7 0.9922918 0
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 2 and decay = 0.7.
Adult_TDA_PC_5.40.5_n1_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.9928826 0 Fold3
## 2 0.9919929 0 Fold2
## 3 0.9920000 0 Fold1
ad_tda_pc_5.40.5_n1_nn1_fit_re<-Adult_TDA_PC_5.40.5_n1_NN1Fit0$resample[1]
summary(Adult_TDA_PC_5.40.5_n1_NN1Fit0)
## a 108-2-1 network with 221 weights
## options were - entropy fitting decay=0.7
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 0.01 0.30 0.00 0.00 0.01 0.00 0.01 0.01
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## -0.03 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.00 0.00 0.02 -0.03 0.01
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.01 0.00 0.00 0.00 -0.03 0.00 0.00 0.01
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.01 0.01 0.00 0.00 0.00 0.00 -0.02 0.00
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.01 0.00 0.00 0.01 0.00 0.00 0.00 0.00
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 0.00 0.00 0.00 0.00 0.00 0.01 0.00 0.01
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.04 -0.01 0.05 0.00 0.00 0.00 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 0.01 0.00 0.00
## b->o h1->o h2->o
## 2.29 2.29 0.81
vip(Adult_TDA_PC_5.40.5_n1_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n1_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.40.5_n1_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n1_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n1_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n1_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 0 0
## >50K 7416 2352
##
## Accuracy : 0.2408
## 95% CI : (0.2323, 0.2494)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.2408
## Prevalence : 0.7592
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n1_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 0 0
## >50K 7416 2352
##
## Accuracy : 0.2408
## 95% CI : (0.2323, 0.2494)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.2408
## Prevalence : 0.7592
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n1_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.2407862 0.0000000 0.2323343 0.2493929 0.7592138
## AccuracyPValue McnemarPValue
## 1.0000000 0.0000000
ad_tda_pc_5.40.5_n1_nn1_cf0_ov_acc<-ad_tda_pc_5.40.5_n1_nn1_cf0$overall[1]
ad_tda_pc_5.40.5_n1_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.0000000 1.0000000 NaN
## Neg Pred Value Precision Recall
## 0.2407862 NA 0.0000000
## F1 Prevalence Detection Rate
## NA 0.7592138 0.0000000
## Detection Prevalence Balanced Accuracy
## 0.0000000 0.5000000
ad_tda_pc_5.40.5_n1_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n1_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.40.5_nn1_n1_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.40.5_n1_nn1_fit_re)
diff_tda_pca_5.40.5_nn1_n1_3_fold
## Accuracy
## 1 -0.1429418
## 2 -0.1990210
## 3 -0.1749429
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_nn1.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nn1_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_nn1.n1_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_nn1.n1_3_fold_odds.left<-bst_tda_pca_5.40.5_nn1.n1_3_fold$probLeft/bst_tda_pca_5.40.5_nn1.n1_3_fold$probRight
bst_tda_pca_5.40.5_nn1.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_nn1.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nn1_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_nn1.n1_3_fold
## $winLeft
## [1] 0.9918667
##
## $winRope
## [1] 0.008133333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.40.5_nn1.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nn1_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nn1.n1_3_fold
## $left
## [1] 0.9934541
##
## $rope
## [1] 0.001336362
##
## $right
## [1] 0.005209553
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_nn1_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_nn1.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nn1_n1_3_fold))
#bf_tda_pca_5.40.5_nn1.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_nn1_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_nn1_n1_3_fold)
## t = -10.608, df = 2, p-value = 0.00877
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.2421875 -0.1024163
## sample estimates:
## mean of x
## -0.1723019
### Test set diff
diff_tda_pca_5.40.5_nn1.n1_test<-(nn1_cf_ov_acc - ad_tda_pc_5.40.5_n1_nn1_cf0_ov_acc)
diff_tda_pca_5.40.5_nn1.n1_test
## Accuracy
## 0.5184275
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_nn1.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nn1.n1_test),-0.01,0.01)
bst_tda_pca_5.40.5_nn1.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_nn1.n1_test_odds.left<-bst_tda_pca_5.40.5_nn1.n1_test$probLeft/bst_tda_pca_5.40.5_nn1.n1_test$probRight
bst_tda_pca_5.40.5_nn1.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_nn1.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nn1.n1_test),-0.01,0.01)
bsr_tda_pca_5.40.5_nn1.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1583
##
## $winRight
## [1] 0.8417
# Bayesian Correlated Test
bct_tda_pca_5.40.5_nn1.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nn1.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nn1.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_nn1.n1_test)))
#BayesFactor
#bf_tda_pca_5.40.5_nn1.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nn1.n1_test)) #bf_tda_pca_5.40.5_nn1.n1_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_nn1.n1_test))
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node2
##Adult_TDA_PC_5.40.5_n2_NN1Fit0 <- nnet(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n2.vec, size=2, range = 0.6,, type='class')
#Neural Network 1
Adult_TDA_PC_5.40.5_n2_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n2.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 4773.379182
## iter 10 value 4573.862247
## iter 20 value 4422.199280
## iter 30 value 4195.569257
## iter 40 value 3947.376138
## iter 50 value 3903.420040
## iter 60 value 3855.397584
## iter 70 value 3774.068612
## iter 80 value 3725.873311
## iter 90 value 3697.575285
## iter 100 value 3681.810334
## final value 3681.810334
## stopped after 100 iterations
## # weights: 331
## initial value 4811.534329
## iter 10 value 4516.327001
## iter 20 value 4393.461012
## iter 30 value 4386.851485
## iter 40 value 4377.013909
## iter 50 value 4376.160326
## iter 60 value 4373.109590
## iter 70 value 4364.791141
## iter 80 value 4340.525366
## iter 90 value 4298.801950
## iter 100 value 4284.552370
## final value 4284.552370
## stopped after 100 iterations
## # weights: 551
## initial value 4859.175205
## iter 10 value 4506.902464
## iter 20 value 4481.102970
## iter 30 value 4407.885549
## iter 40 value 4395.514834
## iter 50 value 4385.493066
## iter 60 value 4383.370678
## iter 70 value 4335.136335
## iter 80 value 4334.000989
## iter 90 value 4327.772010
## iter 100 value 4317.925076
## final value 4317.925076
## stopped after 100 iterations
## # weights: 771
## initial value 4659.816325
## iter 10 value 4440.321513
## iter 20 value 4389.346615
## iter 30 value 4344.469838
## iter 40 value 4335.145320
## iter 50 value 4334.687493
## iter 60 value 4334.418156
## iter 70 value 4333.748007
## iter 80 value 4333.032004
## iter 90 value 4331.475343
## iter 100 value 4325.927923
## final value 4325.927923
## stopped after 100 iterations
## # weights: 221
## initial value 4616.343195
## iter 10 value 4578.629493
## iter 20 value 4400.889965
## iter 30 value 4397.401706
## iter 40 value 4386.098961
## iter 50 value 4383.491126
## iter 60 value 4379.290946
## iter 70 value 4378.346270
## iter 80 value 4377.889730
## iter 90 value 4377.083195
## iter 100 value 4376.035418
## final value 4376.035418
## stopped after 100 iterations
## # weights: 331
## initial value 7057.303705
## iter 10 value 4578.774283
## iter 20 value 4458.595263
## iter 30 value 4368.399393
## iter 40 value 4357.711850
## iter 50 value 4354.987641
## iter 60 value 4329.539156
## iter 70 value 4286.549563
## iter 80 value 4272.888069
## iter 90 value 4248.842678
## iter 100 value 4131.843513
## final value 4131.843513
## stopped after 100 iterations
## # weights: 551
## initial value 4995.047097
## iter 10 value 4477.276178
## iter 20 value 4455.638983
## iter 30 value 4319.471090
## iter 40 value 4308.357541
## iter 50 value 4283.242711
## iter 60 value 4205.724434
## iter 70 value 4173.539510
## iter 80 value 4120.736428
## iter 90 value 3988.239542
## iter 100 value 3877.695568
## final value 3877.695568
## stopped after 100 iterations
## # weights: 771
## initial value 5892.314142
## iter 10 value 4567.619904
## iter 20 value 4406.983745
## iter 30 value 4344.600405
## iter 40 value 4306.456495
## iter 50 value 4300.293258
## iter 60 value 4280.057034
## iter 70 value 4204.028272
## iter 80 value 4120.560136
## iter 90 value 3975.535280
## iter 100 value 3821.903844
## final value 3821.903844
## stopped after 100 iterations
## # weights: 221
## initial value 5481.571387
## iter 10 value 4565.125882
## iter 20 value 4435.418895
## iter 30 value 4421.170590
## iter 40 value 4402.514545
## iter 50 value 4290.903977
## iter 60 value 4142.893155
## iter 70 value 3987.056842
## iter 80 value 3954.378184
## iter 90 value 3873.060697
## iter 100 value 3736.662120
## final value 3736.662120
## stopped after 100 iterations
## # weights: 331
## initial value 5167.625930
## iter 10 value 4543.920596
## iter 20 value 4425.699659
## iter 30 value 4420.567451
## iter 40 value 4384.302969
## iter 50 value 4340.699921
## iter 60 value 4318.728314
## iter 70 value 4304.921397
## iter 80 value 4267.641274
## iter 90 value 4201.205850
## iter 100 value 4059.326861
## final value 4059.326861
## stopped after 100 iterations
## # weights: 551
## initial value 4677.258014
## iter 10 value 4578.176653
## iter 20 value 4415.844407
## iter 30 value 4398.886824
## iter 40 value 4395.798374
## iter 50 value 4393.942397
## iter 60 value 4375.899122
## iter 70 value 4312.346439
## iter 80 value 4285.460161
## iter 90 value 4280.519260
## iter 100 value 4276.863574
## final value 4276.863574
## stopped after 100 iterations
## # weights: 771
## initial value 4678.855412
## iter 10 value 4547.429166
## iter 20 value 4378.731244
## iter 30 value 4368.107494
## iter 40 value 4358.928862
## iter 50 value 4332.035149
## iter 60 value 4322.773138
## iter 70 value 4301.916958
## iter 80 value 4281.476377
## iter 90 value 4238.906771
## iter 100 value 4230.370126
## final value 4230.370126
## stopped after 100 iterations
## # weights: 221
## initial value 6832.111609
## iter 10 value 4553.000331
## iter 20 value 4534.804671
## iter 30 value 4395.579183
## iter 40 value 4381.684346
## final value 4381.681933
## converged
## # weights: 331
## initial value 5749.589992
## iter 10 value 4578.971771
## iter 20 value 4578.690670
## iter 30 value 4473.884516
## iter 40 value 4383.692124
## iter 50 value 4378.782016
## iter 60 value 4369.362247
## iter 70 value 4358.462098
## iter 80 value 4305.828352
## iter 90 value 4224.475100
## iter 100 value 4145.055475
## final value 4145.055475
## stopped after 100 iterations
## # weights: 551
## initial value 5292.533345
## iter 10 value 4433.272933
## iter 20 value 4390.867691
## iter 30 value 4389.804563
## iter 40 value 4312.302249
## iter 50 value 4273.190113
## iter 60 value 4264.515196
## iter 70 value 4262.091358
## iter 80 value 4256.335181
## iter 90 value 4252.670130
## iter 100 value 4248.097698
## final value 4248.097698
## stopped after 100 iterations
## # weights: 771
## initial value 4714.350141
## iter 10 value 4541.911043
## iter 20 value 4386.160148
## iter 30 value 4362.557462
## iter 40 value 4359.168329
## iter 50 value 4358.452600
## iter 60 value 4354.312594
## iter 70 value 4271.556184
## iter 80 value 4199.460175
## iter 90 value 4133.767243
## iter 100 value 4126.989203
## final value 4126.989203
## stopped after 100 iterations
## # weights: 221
## initial value 4859.537003
## iter 10 value 4483.120924
## iter 20 value 4387.428303
## iter 30 value 4385.359203
## iter 40 value 4374.733744
## iter 50 value 4340.685243
## iter 60 value 4272.647781
## iter 70 value 4205.327309
## iter 80 value 4148.969608
## iter 90 value 4077.799250
## iter 100 value 3957.466331
## final value 3957.466331
## stopped after 100 iterations
## # weights: 331
## initial value 5962.177139
## iter 10 value 4553.355943
## iter 20 value 4412.737951
## iter 30 value 4371.039169
## iter 40 value 4370.235871
## iter 50 value 4355.582810
## iter 60 value 4307.617631
## iter 70 value 4257.766846
## iter 80 value 4240.867806
## iter 90 value 4208.851579
## iter 100 value 4196.627102
## final value 4196.627102
## stopped after 100 iterations
## # weights: 551
## initial value 5044.588995
## iter 10 value 4472.690821
## iter 20 value 4423.382430
## iter 30 value 4329.689220
## iter 40 value 4323.114531
## iter 50 value 4320.589367
## iter 60 value 4316.891088
## iter 70 value 4313.329775
## iter 80 value 4310.524059
## iter 90 value 4238.416789
## iter 100 value 4219.600404
## final value 4219.600404
## stopped after 100 iterations
## # weights: 771
## initial value 4722.981089
## iter 10 value 4574.914100
## iter 20 value 4364.144824
## iter 30 value 4320.462400
## iter 40 value 4304.876400
## iter 50 value 4280.976530
## iter 60 value 4271.134943
## iter 70 value 4257.822495
## iter 80 value 4253.925701
## iter 90 value 4249.226413
## iter 100 value 4234.692118
## final value 4234.692118
## stopped after 100 iterations
## # weights: 221
## initial value 6219.281095
## iter 10 value 4569.094634
## iter 20 value 4441.055019
## iter 30 value 4377.237782
## iter 40 value 4327.533995
## iter 50 value 4283.239315
## iter 60 value 4262.744263
## iter 70 value 4258.689661
## iter 80 value 4255.431722
## iter 90 value 4241.516085
## iter 100 value 4193.733658
## final value 4193.733658
## stopped after 100 iterations
## # weights: 331
## initial value 4695.736697
## iter 10 value 4474.773951
## iter 20 value 4354.946419
## iter 30 value 4329.092426
## iter 40 value 4315.455269
## iter 50 value 4310.761826
## iter 60 value 4310.416498
## iter 70 value 4310.352843
## iter 80 value 4307.713727
## iter 90 value 4300.824696
## iter 100 value 4246.176570
## final value 4246.176570
## stopped after 100 iterations
## # weights: 551
## initial value 4951.325686
## iter 10 value 4555.671139
## iter 20 value 4393.185024
## iter 30 value 4299.391947
## iter 40 value 4272.332261
## iter 50 value 4255.963684
## iter 60 value 4254.470936
## iter 70 value 4248.365320
## iter 80 value 4242.363659
## iter 90 value 4215.625004
## iter 100 value 4131.616526
## final value 4131.616526
## stopped after 100 iterations
## # weights: 771
## initial value 4995.921172
## iter 10 value 4558.957036
## iter 20 value 4404.141057
## iter 30 value 4377.228778
## iter 40 value 4373.623531
## iter 50 value 4372.872522
## iter 60 value 4367.760492
## iter 70 value 4365.508721
## iter 80 value 4301.135187
## iter 90 value 4258.063902
## iter 100 value 4082.929451
## final value 4082.929451
## stopped after 100 iterations
## # weights: 221
## initial value 4697.511328
## iter 10 value 4380.950668
## iter 20 value 4336.729260
## iter 30 value 4275.191257
## iter 40 value 4225.520283
## iter 50 value 4153.684907
## iter 60 value 3903.150164
## iter 70 value 3774.072868
## iter 80 value 3718.120586
## iter 90 value 3679.214207
## iter 100 value 3649.695949
## final value 3649.695949
## stopped after 100 iterations
## # weights: 331
## initial value 4599.888499
## iter 10 value 4519.975025
## iter 20 value 4406.677936
## iter 30 value 4403.396172
## iter 40 value 4402.992786
## iter 50 value 4392.463114
## iter 60 value 4389.901480
## iter 70 value 4387.234680
## iter 80 value 4385.165207
## iter 90 value 4383.556294
## iter 100 value 4378.063055
## final value 4378.063055
## stopped after 100 iterations
## # weights: 551
## initial value 5440.404732
## iter 10 value 4549.130620
## iter 20 value 4397.150438
## iter 30 value 4391.197912
## iter 40 value 4378.952086
## iter 50 value 4330.150196
## iter 60 value 4319.792731
## iter 70 value 4307.641637
## iter 80 value 4286.033743
## iter 90 value 4267.214390
## iter 100 value 4193.003651
## final value 4193.003651
## stopped after 100 iterations
## # weights: 771
## initial value 4639.208815
## iter 10 value 4414.515064
## iter 20 value 4350.671968
## iter 30 value 4258.974945
## iter 40 value 4251.454417
## iter 50 value 4241.434615
## iter 60 value 4187.853729
## iter 70 value 4111.386165
## iter 80 value 3979.772354
## iter 90 value 3879.682819
## iter 100 value 3768.818713
## final value 3768.818713
## stopped after 100 iterations
## # weights: 221
## initial value 4926.176857
## iter 10 value 4559.509528
## iter 20 value 4534.342183
## iter 30 value 4534.232315
## final value 4534.232243
## converged
## # weights: 331
## initial value 6102.832679
## iter 10 value 4578.801481
## iter 20 value 4578.591163
## iter 30 value 4384.112953
## iter 40 value 4373.291937
## iter 50 value 4371.932384
## iter 60 value 4363.108704
## iter 70 value 4360.574028
## iter 80 value 4319.285512
## iter 90 value 4304.409146
## iter 100 value 4240.338029
## final value 4240.338029
## stopped after 100 iterations
## # weights: 551
## initial value 5097.738552
## iter 10 value 4478.069276
## iter 20 value 4434.236068
## iter 30 value 4406.984621
## iter 40 value 4397.624476
## iter 50 value 4335.825898
## iter 60 value 4303.712333
## iter 70 value 4249.426709
## iter 80 value 4247.263253
## iter 90 value 4230.932156
## iter 100 value 4225.478552
## final value 4225.478552
## stopped after 100 iterations
## # weights: 771
## initial value 8081.060377
## iter 10 value 4691.171290
## iter 20 value 4471.418174
## iter 30 value 4364.472584
## iter 40 value 4312.302809
## iter 50 value 4281.957898
## iter 60 value 4275.473877
## iter 70 value 4271.771033
## iter 80 value 4263.638006
## iter 90 value 4245.440283
## iter 100 value 4207.403859
## final value 4207.403859
## stopped after 100 iterations
## # weights: 221
## initial value 4604.865846
## iter 10 value 4578.480010
## iter 20 value 4570.901270
## iter 30 value 4521.880368
## iter 40 value 4399.513607
## iter 50 value 4385.637429
## iter 60 value 4381.983065
## iter 70 value 4310.405644
## iter 80 value 4273.692408
## iter 90 value 4246.976082
## iter 100 value 4156.577599
## final value 4156.577599
## stopped after 100 iterations
## # weights: 331
## initial value 4888.154951
## iter 10 value 4481.277947
## iter 20 value 4415.365045
## iter 30 value 4408.254736
## iter 40 value 4407.464298
## iter 50 value 4404.005895
## iter 60 value 4403.476040
## iter 70 value 4383.063550
## iter 80 value 4350.736440
## iter 90 value 4323.261265
## iter 100 value 4249.457299
## final value 4249.457299
## stopped after 100 iterations
## # weights: 551
## initial value 8004.739127
## iter 10 value 4579.675390
## iter 20 value 4417.973158
## iter 30 value 4407.830764
## iter 40 value 4399.981308
## iter 50 value 4399.875305
## iter 60 value 4385.351351
## iter 70 value 4375.403665
## iter 80 value 4307.573317
## iter 90 value 4026.059251
## iter 100 value 3987.845128
## final value 3987.845128
## stopped after 100 iterations
## # weights: 771
## initial value 4667.715329
## iter 10 value 4415.812118
## iter 20 value 4374.879811
## iter 30 value 4352.899860
## iter 40 value 4306.365050
## iter 50 value 4296.930133
## iter 60 value 4266.457461
## iter 70 value 4244.253660
## iter 80 value 4240.544006
## iter 90 value 4238.953900
## iter 100 value 4236.683826
## final value 4236.683826
## stopped after 100 iterations
## # weights: 221
## initial value 7194.655172
## iter 10 value 6705.695442
## iter 20 value 6461.267374
## iter 30 value 6420.878980
## iter 40 value 6384.852355
## iter 50 value 6251.085130
## iter 60 value 6135.787816
## iter 70 value 6015.381926
## iter 80 value 5956.915998
## iter 90 value 5825.944075
## iter 100 value 5745.030675
## final value 5745.030675
## stopped after 100 iterations
Adult_TDA_PC_5.40.5_n2_NN1Fit0
## Neural Network
##
## 10276 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 6851, 6851, 6850
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.6729236 0.24028243
## 2 0.5 0.6348797 0.10677450
## 2 0.7 0.6414004 0.11773175
## 3 0.3 0.6251474 0.08292598
## 3 0.5 0.6213487 0.05674173
## 3 0.7 0.6338092 0.11008402
## 5 0.3 0.6108408 0.00000000
## 5 0.5 0.6426656 0.13093242
## 5 0.7 0.6483015 0.19178607
## 7 0.3 0.6460636 0.14700438
## 7 0.5 0.6413988 0.15756464
## 7 0.7 0.6339065 0.09782916
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 2 and decay = 0.3.
Adult_TDA_PC_5.40.5_n2_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.7095738 0.3690617 Fold3
## 2 0.6108029 0.0000000 Fold2
## 3 0.6983942 0.3517856 Fold1
ad_tda_pc_5.40.5_n2_nn1_fit_re<-Adult_TDA_PC_5.40.5_n2_NN1Fit0$resample[1]
summary(Adult_TDA_PC_5.40.5_n2_NN1Fit0)
## a 108-2-1 network with 221 weights
## options were - entropy fitting decay=0.3
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 0.03 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 0.01 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 0.02 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.00 0.03 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## -1.33 0.01 -0.46 -1.05 0.25 0.00 -0.89 -0.82
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.99 0.31 0.34 0.00 -0.51 -0.54 -0.45 -0.96
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## -0.34 0.29 -0.69 1.10 1.22 0.67 -0.19 0.46
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## -1.35 0.00 -0.34 0.31 -0.26 -1.42 -0.52 1.70
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## -0.13 -0.54 -0.24 -0.18 -0.46 -1.32 -0.01 0.88
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## -0.48 2.14 -0.27 -0.75 -0.43 0.00 0.10 -0.20
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## -0.36 -0.97 0.80 3.09 -1.51 -0.80 -0.24 -0.54
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## -1.34 1.05 0.87 -3.46 -0.97 1.18 -1.32 -0.01
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.00 -0.01 0.20 -1.28 -0.45 1.68 0.79
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## -0.46 0.66 -0.81 -0.04 -0.64 -1.49 -0.98 1.42
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## -0.01 0.13 0.00 0.00 0.11 -0.12 2.06 0.39
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## -1.25 0.02 -0.54 0.37 0.16 -1.20 -0.13 0.09
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## -0.33 -1.58 0.34 0.15 -0.60 -0.68 -0.21 1.56
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.66 -0.22 -0.20 0.66 0.43
## b->o h1->o h2->o
## 1.10 1.11 -3.10
vip(Adult_TDA_PC_5.40.5_n2_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n2_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.40.5_n2_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n2_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n2_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n2_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 1023 386
## >50K 6393 1966
##
## Accuracy : 0.306
## 95% CI : (0.2969, 0.3152)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.014
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.1379
## Specificity : 0.8359
## Pos Pred Value : 0.7260
## Neg Pred Value : 0.2352
## Prevalence : 0.7592
## Detection Rate : 0.1047
## Detection Prevalence : 0.1442
## Balanced Accuracy : 0.4869
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n2_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 1023 386
## >50K 6393 1966
##
## Accuracy : 0.306
## 95% CI : (0.2969, 0.3152)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.014
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.1379
## Specificity : 0.8359
## Pos Pred Value : 0.7260
## Neg Pred Value : 0.2352
## Prevalence : 0.7592
## Detection Rate : 0.1047
## Detection Prevalence : 0.1442
## Balanced Accuracy : 0.4869
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n2_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.30599918 -0.01398009 0.29686821 0.31524595 0.75921376
## AccuracyPValue McnemarPValue
## 1.00000000 0.00000000
ad_tda_pc_5.40.5_n2_nn1_cf0_ov_acc<-ad_tda_pc_5.40.5_n2_nn1_cf0$overall[1]
ad_tda_pc_5.40.5_n2_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.1379450 0.8358844 0.7260468
## Neg Pred Value Precision Recall
## 0.2351956 0.7260468 0.1379450
## F1 Prevalence Detection Rate
## 0.2318414 0.7592138 0.1047297
## Detection Prevalence Balanced Accuracy
## 0.1442465 0.4869147
ad_tda_pc_5.40.5_n2_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n2_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.40.5_nn1_n2_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.40.5_n2_nn1_fit_re)
diff_tda_pca_5.40.5_nn1_n2_3_fold
## Accuracy
## 1 0.1403669
## 2 0.1821689
## 3 0.1186630
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_nn1.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nn1_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_nn1.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_nn1.n2_3_fold_odds.left<-bst_tda_pca_5.40.5_nn1.n2_3_fold$probLeft/bst_tda_pca_5.40.5_nn1.n2_3_fold$probRight
bst_tda_pca_5.40.5_nn1.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_nn1.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nn1_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_nn1.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.008833333
##
## $winRight
## [1] 0.9911667
# Bayesian Correlated Test
bct_tda_pca_5.40.5_nn1.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nn1_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nn1.n2_3_fold
## $left
## [1] 0.00912915
##
## $rope
## [1] 0.002757308
##
## $right
## [1] 0.9881135
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_nn1_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_nn1.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nn1_n2_3_fold))
#bf_tda_pca_5.40.5_nn1.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_nn1_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_nn1_n2_3_fold)
## t = 7.8915, df = 2, p-value = 0.01568
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.0668816 0.2272509
## sample estimates:
## mean of x
## 0.1470663
### Test set diff
diff_tda_pca_5.40.5_nn1.n2_test<-(nn1_cf_ov_acc - ad_tda_pc_5.40.5_n2_nn1_cf0_ov_acc)
diff_tda_pca_5.40.5_nn1.n2_test
## Accuracy
## 0.4532146
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_nn1.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nn1.n2_test),-0.01,0.01)
bst_tda_pca_5.40.5_nn1.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_nn1.n2_test_odds.left<-bst_tda_pca_5.40.5_nn1.n2_test$probLeft/bst_tda_pca_5.40.5_nn1.n2_test$probRight
bst_tda_pca_5.40.5_nn1.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_nn1.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nn1.n2_test),-0.01,0.01)
bsr_tda_pca_5.40.5_nn1.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1590667
##
## $winRight
## [1] 0.8409333
# Bayesian Correlated Test
bct_tda_pca_5.40.5_nn1.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nn1.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nn1.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_nn1.n2_test)))
#BayesFactor
#bf_tda_pca_5.40.5_nn1.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nn1.n2_test)) #bf_tda_pca_5.40.5_nn1.n2_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_nn1.n2_test))
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node3
#Neural Network 1
Adult_TDA_PC_5.40.5_n3_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n3.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 3955.295507
## iter 10 value 3781.943343
## iter 20 value 3760.057532
## iter 30 value 3654.501332
## iter 40 value 3649.457264
## iter 50 value 3648.827611
## iter 60 value 3642.120553
## iter 70 value 3624.794168
## iter 80 value 3615.880371
## iter 90 value 3502.233978
## iter 100 value 3325.726199
## final value 3325.726199
## stopped after 100 iterations
## # weights: 331
## initial value 4181.416680
## iter 10 value 3850.364005
## iter 20 value 3745.159916
## iter 30 value 3737.552609
## iter 40 value 3627.031758
## iter 50 value 3593.481970
## iter 60 value 3332.411931
## iter 70 value 3010.873412
## iter 80 value 2648.391820
## iter 90 value 2593.348680
## iter 100 value 2538.454952
## final value 2538.454952
## stopped after 100 iterations
## # weights: 551
## initial value 5218.511345
## iter 10 value 3892.837730
## iter 20 value 3854.371530
## iter 30 value 3715.595410
## iter 40 value 3554.492140
## iter 50 value 3537.467117
## iter 60 value 3494.845286
## iter 70 value 3383.507163
## iter 80 value 3159.147010
## iter 90 value 2973.533466
## iter 100 value 2780.800458
## final value 2780.800458
## stopped after 100 iterations
## # weights: 771
## initial value 5698.722132
## iter 10 value 3927.905136
## iter 20 value 3894.361564
## iter 30 value 3688.134407
## iter 40 value 3656.564112
## iter 50 value 3533.001938
## iter 60 value 3452.471383
## iter 70 value 3353.692582
## iter 80 value 3020.222576
## iter 90 value 2638.843433
## iter 100 value 2612.652296
## final value 2612.652296
## stopped after 100 iterations
## # weights: 221
## initial value 5896.567481
## iter 10 value 3928.017530
## iter 20 value 3643.796812
## iter 30 value 3589.664385
## iter 40 value 3547.301092
## iter 50 value 3485.741323
## iter 60 value 3241.582642
## iter 70 value 2989.855725
## iter 80 value 2782.135961
## iter 90 value 2635.352857
## iter 100 value 2572.752285
## final value 2572.752285
## stopped after 100 iterations
## # weights: 331
## initial value 5567.537509
## iter 10 value 3812.586066
## iter 20 value 3687.558151
## iter 30 value 3643.701562
## iter 40 value 3618.142081
## iter 50 value 3578.220672
## iter 60 value 3545.890999
## iter 70 value 3509.955357
## iter 80 value 3466.087280
## iter 90 value 3403.195497
## iter 100 value 3216.321043
## final value 3216.321043
## stopped after 100 iterations
## # weights: 551
## initial value 4184.778780
## iter 10 value 3947.242389
## iter 20 value 3934.349997
## iter 30 value 3708.807366
## iter 40 value 3543.013547
## iter 50 value 3524.568237
## iter 60 value 3477.054528
## iter 70 value 3339.824447
## iter 80 value 3128.657139
## iter 90 value 3012.588971
## iter 100 value 2906.986228
## final value 2906.986228
## stopped after 100 iterations
## # weights: 771
## initial value 4227.762468
## iter 10 value 3945.756783
## iter 20 value 3818.532125
## iter 30 value 3668.774469
## iter 40 value 3649.576934
## iter 50 value 3643.491774
## iter 60 value 3577.263347
## iter 70 value 3570.988475
## iter 80 value 3563.821399
## iter 90 value 3561.826720
## iter 100 value 3507.158642
## final value 3507.158642
## stopped after 100 iterations
## # weights: 221
## initial value 4326.630909
## iter 10 value 3813.912066
## iter 20 value 3674.164419
## iter 30 value 3645.884100
## iter 40 value 3613.466391
## iter 50 value 3517.292569
## iter 60 value 3391.772524
## iter 70 value 2930.940476
## iter 80 value 2650.973297
## iter 90 value 2567.268074
## iter 100 value 2527.060518
## final value 2527.060518
## stopped after 100 iterations
## # weights: 331
## initial value 4614.450119
## iter 10 value 3734.967722
## iter 20 value 3624.396705
## iter 30 value 3585.221565
## iter 40 value 3551.099984
## iter 50 value 3547.394394
## iter 60 value 3512.050104
## iter 70 value 3504.932820
## iter 80 value 3500.826937
## iter 90 value 3493.637179
## iter 100 value 3491.744644
## final value 3491.744644
## stopped after 100 iterations
## # weights: 551
## initial value 5324.107772
## iter 10 value 3938.740484
## iter 20 value 3709.519382
## iter 30 value 3644.579210
## iter 40 value 3609.043519
## iter 50 value 3568.759201
## iter 60 value 3522.806709
## iter 70 value 3507.686808
## iter 80 value 3495.177385
## iter 90 value 3468.470583
## iter 100 value 3460.235365
## final value 3460.235365
## stopped after 100 iterations
## # weights: 771
## initial value 6275.237206
## iter 10 value 3856.578084
## iter 20 value 3816.503024
## iter 30 value 3684.670392
## iter 40 value 3634.655529
## iter 50 value 3596.094055
## iter 60 value 3557.547768
## iter 70 value 3554.934558
## iter 80 value 3547.427395
## iter 90 value 3529.601415
## iter 100 value 3497.729066
## final value 3497.729066
## stopped after 100 iterations
## # weights: 221
## initial value 4747.737032
## iter 10 value 3796.723768
## iter 20 value 3614.591542
## iter 30 value 3555.554245
## iter 40 value 3523.284586
## iter 50 value 3404.457291
## iter 60 value 3186.895069
## iter 70 value 2910.772053
## iter 80 value 2777.132646
## iter 90 value 2698.440009
## iter 100 value 2535.889528
## final value 2535.889528
## stopped after 100 iterations
## # weights: 331
## initial value 7482.730340
## iter 10 value 3930.317146
## iter 20 value 3703.098307
## iter 30 value 3682.751799
## iter 40 value 3672.177846
## iter 50 value 3661.633352
## iter 60 value 3546.369946
## iter 70 value 3032.872206
## iter 80 value 2818.943597
## iter 90 value 2770.073339
## iter 100 value 2695.789482
## final value 2695.789482
## stopped after 100 iterations
## # weights: 551
## initial value 3944.363193
## iter 10 value 3792.996170
## iter 20 value 3677.691785
## iter 30 value 3587.058376
## iter 40 value 3554.569309
## iter 50 value 3548.598752
## iter 60 value 3547.317841
## iter 70 value 3545.959171
## iter 80 value 3544.573915
## iter 90 value 3543.019260
## iter 100 value 3509.895853
## final value 3509.895853
## stopped after 100 iterations
## # weights: 771
## initial value 6531.770447
## iter 10 value 3796.164166
## iter 20 value 3724.297570
## iter 20 value 3724.297552
## iter 30 value 3675.795383
## iter 40 value 3626.931992
## iter 50 value 3618.942999
## iter 60 value 3616.305509
## iter 70 value 3605.338685
## iter 80 value 3596.098675
## iter 90 value 3583.875461
## iter 100 value 3489.357348
## final value 3489.357348
## stopped after 100 iterations
## # weights: 221
## initial value 4187.386163
## iter 10 value 3718.167102
## iter 20 value 3596.663614
## iter 30 value 3527.192912
## iter 40 value 3438.785584
## iter 50 value 3138.091158
## iter 60 value 2743.575117
## iter 70 value 2639.850778
## iter 80 value 2630.817916
## iter 90 value 2628.909675
## iter 100 value 2623.750392
## final value 2623.750392
## stopped after 100 iterations
## # weights: 331
## initial value 9739.689265
## iter 10 value 3803.338926
## iter 20 value 3756.299153
## iter 30 value 3635.902795
## iter 40 value 3634.344319
## iter 50 value 3593.593402
## iter 60 value 3154.173843
## iter 70 value 3066.967915
## iter 80 value 2952.191504
## iter 90 value 2730.607210
## iter 100 value 2599.468730
## final value 2599.468730
## stopped after 100 iterations
## # weights: 551
## initial value 5251.904838
## iter 10 value 3934.997583
## iter 20 value 3703.592295
## iter 30 value 3694.649612
## iter 40 value 3581.029540
## iter 50 value 3569.168768
## iter 60 value 3558.544011
## iter 70 value 3540.202740
## iter 80 value 3521.625668
## iter 90 value 3335.099245
## iter 100 value 2973.057238
## final value 2973.057238
## stopped after 100 iterations
## # weights: 771
## initial value 6281.345748
## iter 10 value 3748.584218
## iter 20 value 3639.374557
## iter 30 value 3555.290805
## iter 40 value 3537.312008
## iter 50 value 3494.934771
## iter 60 value 3118.754078
## iter 70 value 2901.872881
## iter 80 value 2861.873013
## iter 90 value 2706.044796
## iter 100 value 2595.700851
## final value 2595.700851
## stopped after 100 iterations
## # weights: 221
## initial value 4901.329450
## iter 10 value 3934.572498
## iter 20 value 3715.299253
## iter 30 value 3692.798794
## iter 40 value 3681.359874
## iter 50 value 3676.593144
## iter 60 value 3667.529269
## iter 70 value 3640.537480
## iter 80 value 3595.979354
## iter 90 value 3401.827402
## iter 100 value 3200.570050
## final value 3200.570050
## stopped after 100 iterations
## # weights: 331
## initial value 7197.307374
## iter 10 value 3875.718936
## iter 20 value 3870.295303
## iter 30 value 3711.787325
## iter 40 value 3710.509278
## iter 50 value 3710.388317
## iter 60 value 3704.796874
## iter 70 value 3702.938082
## iter 80 value 3594.470913
## iter 90 value 3510.090813
## iter 100 value 3323.086581
## final value 3323.086581
## stopped after 100 iterations
## # weights: 551
## initial value 4285.060878
## iter 10 value 3889.950922
## iter 20 value 3710.116519
## iter 30 value 3583.791093
## iter 40 value 3269.246460
## iter 50 value 2961.879180
## iter 60 value 2792.521613
## iter 70 value 2717.991068
## iter 80 value 2622.066945
## iter 90 value 2583.619565
## iter 100 value 2559.095879
## final value 2559.095879
## stopped after 100 iterations
## # weights: 771
## initial value 7334.765810
## iter 10 value 3940.781326
## iter 20 value 3906.811389
## iter 30 value 3714.285954
## iter 40 value 3704.454605
## final value 3702.160431
## converged
## # weights: 221
## initial value 4888.829580
## iter 10 value 3935.004455
## iter 20 value 3747.100573
## iter 30 value 3676.089487
## iter 40 value 3670.204413
## iter 50 value 3585.278163
## iter 60 value 3344.717768
## iter 70 value 3033.554009
## iter 80 value 2708.656854
## iter 90 value 2615.054621
## iter 100 value 2613.304744
## final value 2613.304744
## stopped after 100 iterations
## # weights: 331
## initial value 4334.085183
## iter 10 value 3943.363355
## iter 20 value 3943.251919
## iter 30 value 3733.605517
## iter 40 value 3640.055783
## iter 50 value 3525.366513
## iter 60 value 3153.566695
## iter 70 value 2730.433017
## iter 80 value 2593.412565
## iter 90 value 2546.627491
## iter 100 value 2494.515146
## final value 2494.515146
## stopped after 100 iterations
## # weights: 551
## initial value 8605.550174
## iter 10 value 3855.809833
## iter 20 value 3635.990200
## iter 30 value 3590.120576
## iter 40 value 3537.764303
## iter 50 value 3491.939404
## iter 60 value 3263.062398
## iter 70 value 3124.816794
## iter 80 value 3012.774463
## iter 90 value 2837.086473
## iter 100 value 2668.592924
## final value 2668.592924
## stopped after 100 iterations
## # weights: 771
## initial value 14862.213732
## iter 10 value 3724.307711
## iter 20 value 3692.242479
## iter 30 value 3689.172530
## iter 40 value 3686.733627
## iter 50 value 3686.587806
## iter 60 value 3651.056916
## iter 70 value 3644.283172
## iter 80 value 3643.375248
## final value 3643.374273
## converged
## # weights: 221
## initial value 6599.352407
## iter 10 value 3827.780922
## iter 20 value 3685.908923
## iter 30 value 3680.862875
## iter 40 value 3666.646063
## iter 50 value 3598.215805
## iter 60 value 3554.635650
## iter 70 value 3524.623732
## iter 80 value 3514.933273
## iter 90 value 3499.888326
## iter 100 value 3426.358301
## final value 3426.358301
## stopped after 100 iterations
## # weights: 331
## initial value 5013.495488
## iter 10 value 3944.603978
## iter 20 value 3922.797896
## iter 30 value 3920.346524
## iter 40 value 3748.408722
## iter 50 value 3745.008182
## iter 60 value 3688.354261
## iter 70 value 3676.446389
## iter 80 value 3671.167426
## iter 90 value 3663.930693
## iter 100 value 3385.048993
## final value 3385.048993
## stopped after 100 iterations
## # weights: 551
## initial value 8755.052937
## iter 10 value 3937.316494
## iter 20 value 3928.565749
## iter 30 value 3835.937653
## iter 40 value 3559.578989
## iter 50 value 3488.036573
## iter 60 value 3139.132235
## iter 70 value 2895.771367
## iter 80 value 2853.149149
## iter 90 value 2828.651004
## iter 100 value 2813.266486
## final value 2813.266486
## stopped after 100 iterations
## # weights: 771
## initial value 8075.865874
## iter 10 value 3926.383328
## iter 20 value 3687.995998
## iter 30 value 3668.845386
## iter 40 value 3663.927199
## iter 50 value 3658.936728
## iter 60 value 3598.938339
## iter 70 value 3538.213337
## iter 80 value 3531.879292
## iter 90 value 3529.496735
## iter 100 value 3506.078179
## final value 3506.078179
## stopped after 100 iterations
## # weights: 221
## initial value 5292.934413
## iter 10 value 3924.273586
## iter 20 value 3667.346976
## iter 30 value 3587.559862
## iter 40 value 3535.379395
## iter 50 value 3523.302493
## iter 60 value 3507.874777
## iter 70 value 3472.370173
## iter 80 value 3363.510020
## iter 90 value 3284.228158
## iter 100 value 2950.794861
## final value 2950.794861
## stopped after 100 iterations
## # weights: 331
## initial value 6162.505380
## iter 10 value 3939.181455
## iter 20 value 3718.328940
## iter 30 value 3675.894666
## iter 40 value 3654.751896
## iter 50 value 3611.793269
## iter 60 value 3581.715644
## iter 70 value 3544.528792
## iter 80 value 3502.421027
## iter 90 value 3321.884975
## iter 100 value 3112.164768
## final value 3112.164768
## stopped after 100 iterations
## # weights: 551
## initial value 5813.814791
## iter 10 value 3950.276468
## iter 20 value 3936.166982
## iter 30 value 3811.582479
## iter 40 value 3676.817169
## iter 50 value 3508.630339
## iter 60 value 3291.397918
## iter 70 value 3112.810345
## iter 80 value 2973.828180
## iter 90 value 2755.055078
## iter 100 value 2607.110247
## final value 2607.110247
## stopped after 100 iterations
## # weights: 771
## initial value 5703.573167
## iter 10 value 4220.397008
## iter 20 value 3718.403852
## iter 30 value 3682.490049
## iter 40 value 3666.934976
## iter 50 value 3625.270492
## iter 60 value 3588.029677
## iter 70 value 3548.035582
## iter 80 value 3520.258507
## iter 90 value 3507.837967
## iter 100 value 3486.173834
## final value 3486.173834
## stopped after 100 iterations
## # weights: 331
## initial value 8413.774862
## iter 10 value 5905.959520
## iter 20 value 5614.470453
## iter 30 value 5571.650843
## iter 40 value 5536.440739
## iter 50 value 5359.220766
## iter 60 value 5321.698322
## iter 70 value 5281.724945
## iter 80 value 5242.088053
## iter 90 value 5144.087085
## iter 100 value 4904.167315
## final value 4904.167315
## stopped after 100 iterations
Adult_TDA_PC_5.40.5_n3_NN1Fit0
## Neural Network
##
## 11563 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7709, 7708, 7709
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.8470968 0.4414374
## 2 0.5 0.8555725 0.4751267
## 2 0.7 0.8512483 0.4382265
## 3 0.3 0.8627520 0.5578521
## 3 0.5 0.8343827 0.3412189
## 3 0.7 0.8305804 0.3670681
## 5 0.3 0.8428640 0.4573135
## 5 0.5 0.8525477 0.5260171
## 5 0.7 0.8522859 0.4656912
## 7 0.3 0.8332623 0.3240830
## 7 0.5 0.8399178 0.3521745
## 7 0.7 0.8209806 0.2473099
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.3.
Adult_TDA_PC_5.40.5_n3_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.8632590 0.5442643 Fold3
## 2 0.8612192 0.5703556 Fold2
## 3 0.8637779 0.5589364 Fold1
ad_tda_pc_5.40.5_n3_nn1_fit_re<-Adult_TDA_PC_5.40.5_n3_NN1Fit0$resample[1]
summary(Adult_TDA_PC_5.40.5_n3_NN1Fit0)
## a 108-3-1 network with 331 weights
## options were - entropy fitting decay=0.3
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## -1.21 -0.05 -0.65 -1.49 -0.53 0.00 0.64 -0.01
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 2.74 -1.82 -0.09 0.00 2.65 -4.00 -2.40 -0.08
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## -0.02 -0.88 0.49 1.06 0.27 2.44 -0.75 2.78
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## -1.29 -0.29 0.44 -1.62 -0.13 1.16 -1.58 2.48
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## -0.29 0.10 -1.45 -1.63 -0.65 -0.84 -0.16 0.91
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## -1.17 1.22 2.42 1.00 -3.12 0.05 -1.05 1.25
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## -0.93 -1.14 1.02 2.94 0.37 -3.30 -0.19 -2.37
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 1.34 -1.89 -0.76 -3.22 0.13 4.53 -3.42 2.22
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.00 -0.04 0.02 -0.31 0.06 -1.37 -0.21
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.14 0.37 -0.04 -1.42 -0.24 -0.36 -0.09 -1.17
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## -0.25 0.12 0.00 0.00 -0.64 -0.33 0.99 1.12
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.35 -0.26 0.02 1.76 -0.50 -0.71 0.34 -0.42
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## -0.12 -1.69 -1.41 -0.22 0.07 -0.56 1.81 -0.52
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## -0.68 1.31 2.48 1.21 0.12
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.00 0.00 0.00 0.01 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 0.00 0.00 0.00
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 0.00 -0.02 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.00 0.00 -0.01 0.00 0.00 0.00 0.00 0.00
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 0.00 0.00 0.00
## b->o h1->o h2->o h3->o
## 0.20 -2.45 0.01 0.36
vip(Adult_TDA_PC_5.40.5_n3_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n3_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.40.5_n3_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n3_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n3_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n3_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 4316 1759
## >50K 3100 593
##
## Accuracy : 0.5026
## 95% CI : (0.4926, 0.5125)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.1389
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.5820
## Specificity : 0.2521
## Pos Pred Value : 0.7105
## Neg Pred Value : 0.1606
## Prevalence : 0.7592
## Detection Rate : 0.4419
## Detection Prevalence : 0.6219
## Balanced Accuracy : 0.4171
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n3_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 4316 1759
## >50K 3100 593
##
## Accuracy : 0.5026
## 95% CI : (0.4926, 0.5125)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.1389
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.5820
## Specificity : 0.2521
## Pos Pred Value : 0.7105
## Neg Pred Value : 0.1606
## Prevalence : 0.7592
## Detection Rate : 0.4419
## Detection Prevalence : 0.6219
## Balanced Accuracy : 0.4171
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n3_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.025594e-01 -1.388585e-01 4.925933e-01 5.125239e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 2.355701e-82
ad_tda_pc_5.40.5_n3_nn1_cf0_ov_acc<-ad_tda_pc_5.40.5_n3_nn1_cf0$overall[1]
ad_tda_pc_5.40.5_n3_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.5819849 0.2521259 0.7104527
## Neg Pred Value Precision Recall
## 0.1605741 0.7104527 0.5819849
## F1 Prevalence Detection Rate
## 0.6398340 0.7592138 0.4418509
## Detection Prevalence Balanced Accuracy
## 0.6219287 0.4170554
ad_tda_pc_5.40.5_n3_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n3_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.40.5_nn1_n3_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.40.5_n3_nn1_fit_re)
diff_tda_pca_5.40.5_nn1_n3_3_fold
## Accuracy
## 1 -0.01331819
## 2 -0.06824736
## 3 -0.04672077
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_nn1.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nn1_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_nn1.n3_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_nn1.n3_3_fold_odds.left<-bst_tda_pca_5.40.5_nn1.n3_3_fold$probLeft/bst_tda_pca_5.40.5_nn1.n3_3_fold$probRight
bst_tda_pca_5.40.5_nn1.n3_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_nn1.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nn1_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_nn1.n3_3_fold
## $winLeft
## [1] 0.9630667
##
## $winRope
## [1] 0.03693333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.40.5_nn1.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nn1_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nn1.n3_3_fold
## $left
## [1] 0.8911024
##
## $rope
## [1] 0.05707951
##
## $right
## [1] 0.05181807
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_nn1_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_nn1.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nn1_n3_3_fold))
#bf_tda_pca_5.40.5_nn1.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_nn1_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_nn1_n3_3_fold)
## t = -2.676, df = 2, p-value = 0.1159
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.11151740 0.02599319
## sample estimates:
## mean of x
## -0.04276211
### Test set diff
diff_tda_pca_5.40.5_nn1.n3_test<-(nn1_cf_ov_acc - ad_tda_pc_5.40.5_n3_nn1_cf0_ov_acc)
diff_tda_pca_5.40.5_nn1.n3_test
## Accuracy
## 0.2566544
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_nn1.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nn1.n3_test),-0.01,0.01)
bst_tda_pca_5.40.5_nn1.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_nn1.n3_test_odds.left<-bst_tda_pca_5.40.5_nn1.n3_test$probLeft/bst_tda_pca_5.40.5_nn1.n3_test$probRight
bst_tda_pca_5.40.5_nn1.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_nn1.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nn1.n3_test),-0.01,0.01)
bsr_tda_pca_5.40.5_nn1.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1626
##
## $winRight
## [1] 0.8374
# Bayesian Correlated Test
bct_tda_pca_5.40.5_nn1.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nn1.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nn1.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_nn1.n3_test)))
#BayesFactor
#bf_tda_pca_5.40.5_nn1.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nn1.n3_test)) #bf_tda_pca_5.40.5_nn1.n3_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_nn1.n3_test))
##Node4
#Neural Network 1
Adult_TDA_PC_5.40.5_n4_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n4.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 8284.101886
## iter 10 value 1741.933557
## iter 20 value 1741.831713
## iter 30 value 1741.770934
## iter 40 value 1741.455747
## iter 50 value 1741.415556
## iter 60 value 1632.821214
## iter 70 value 1573.412172
## iter 80 value 1521.550168
## iter 90 value 1507.379563
## iter 100 value 1497.034421
## final value 1497.034421
## stopped after 100 iterations
## # weights: 331
## initial value 13379.405334
## iter 10 value 1745.024175
## iter 20 value 1743.367144
## iter 30 value 1743.344910
## iter 30 value 1743.344898
## iter 40 value 1641.981735
## iter 50 value 1599.739199
## iter 60 value 1596.743822
## iter 70 value 1586.703835
## iter 80 value 1534.175627
## iter 90 value 1513.221771
## iter 100 value 1502.225402
## final value 1502.225402
## stopped after 100 iterations
## # weights: 551
## initial value 13037.268847
## iter 10 value 1798.909989
## iter 20 value 1532.142558
## iter 30 value 1515.936116
## iter 40 value 1502.979483
## iter 50 value 1494.317317
## iter 60 value 1479.584409
## iter 70 value 1443.432370
## iter 80 value 1357.942722
## iter 90 value 1334.371734
## iter 100 value 1309.199627
## final value 1309.199627
## stopped after 100 iterations
## # weights: 771
## initial value 8717.667487
## iter 10 value 1831.675739
## iter 20 value 1744.362147
## iter 30 value 1737.535963
## iter 40 value 1707.653412
## iter 50 value 1664.337423
## iter 60 value 1535.088197
## iter 70 value 1521.696727
## iter 80 value 1518.405469
## iter 90 value 1515.852314
## iter 100 value 1497.583281
## final value 1497.583281
## stopped after 100 iterations
## # weights: 221
## initial value 9054.931299
## iter 10 value 1743.305023
## iter 20 value 1742.802888
## iter 30 value 1709.039006
## iter 40 value 1603.048099
## iter 50 value 1597.357999
## iter 60 value 1591.221960
## iter 70 value 1586.795105
## iter 80 value 1558.891198
## iter 90 value 1523.562225
## iter 100 value 1502.719165
## final value 1502.719165
## stopped after 100 iterations
## # weights: 331
## initial value 4162.877348
## iter 10 value 1743.796344
## iter 20 value 1742.832176
## iter 30 value 1742.797385
## iter 40 value 1742.038304
## iter 50 value 1741.993324
## final value 1741.991655
## converged
## # weights: 551
## initial value 4797.235452
## iter 10 value 1616.808905
## iter 20 value 1594.029675
## iter 30 value 1593.599211
## iter 40 value 1592.610226
## iter 50 value 1589.006188
## iter 60 value 1585.300670
## iter 70 value 1581.193765
## iter 80 value 1544.250539
## iter 90 value 1480.872916
## iter 100 value 1401.340464
## final value 1401.340464
## stopped after 100 iterations
## # weights: 771
## initial value 10450.582586
## iter 10 value 1594.271255
## iter 20 value 1591.275380
## final value 1591.271172
## converged
## # weights: 221
## initial value 9118.992269
## iter 10 value 1750.483612
## iter 20 value 1745.681374
## iter 30 value 1741.836709
## iter 40 value 1740.226266
## iter 50 value 1615.072671
## iter 60 value 1569.864420
## iter 70 value 1540.148893
## iter 80 value 1527.588242
## iter 90 value 1523.528807
## iter 100 value 1501.310270
## final value 1501.310270
## stopped after 100 iterations
## # weights: 331
## initial value 5486.219496
## iter 10 value 1745.720722
## iter 20 value 1719.107421
## iter 30 value 1623.654534
## iter 40 value 1581.167787
## iter 50 value 1559.446996
## iter 60 value 1542.004760
## iter 70 value 1523.288867
## iter 80 value 1519.535383
## iter 90 value 1510.013262
## iter 100 value 1509.154054
## final value 1509.154054
## stopped after 100 iterations
## # weights: 551
## initial value 7152.124017
## iter 10 value 1839.847387
## iter 20 value 1767.138235
## iter 30 value 1696.407457
## iter 40 value 1624.869630
## iter 50 value 1573.944708
## iter 60 value 1465.805484
## iter 70 value 1400.876133
## iter 80 value 1385.108418
## iter 90 value 1377.278088
## iter 100 value 1313.188595
## final value 1313.188595
## stopped after 100 iterations
## # weights: 771
## initial value 4362.922168
## iter 10 value 1740.394404
## iter 20 value 1737.771328
## iter 30 value 1737.046480
## iter 40 value 1598.133109
## iter 50 value 1567.022739
## iter 60 value 1563.985768
## iter 70 value 1559.555313
## iter 80 value 1554.446323
## iter 90 value 1532.794400
## iter 100 value 1517.823311
## final value 1517.823311
## stopped after 100 iterations
## # weights: 221
## initial value 4612.921893
## iter 10 value 1743.619727
## iter 20 value 1693.822740
## iter 30 value 1590.430508
## iter 40 value 1588.957604
## iter 50 value 1580.699887
## iter 60 value 1579.734944
## iter 70 value 1555.211776
## iter 80 value 1539.185950
## iter 90 value 1490.299331
## iter 100 value 1481.813697
## final value 1481.813697
## stopped after 100 iterations
## # weights: 331
## initial value 11339.736302
## iter 10 value 1745.017890
## iter 20 value 1743.205043
## iter 30 value 1741.174306
## iter 40 value 1739.939820
## iter 50 value 1595.412208
## iter 60 value 1547.878534
## iter 70 value 1505.282061
## iter 80 value 1491.892752
## iter 90 value 1490.636252
## iter 100 value 1489.222173
## final value 1489.222173
## stopped after 100 iterations
## # weights: 551
## initial value 2477.254837
## iter 10 value 1741.812049
## iter 20 value 1741.352319
## iter 30 value 1741.346448
## iter 40 value 1731.787572
## iter 50 value 1731.164170
## iter 60 value 1709.630484
## iter 70 value 1680.738732
## iter 80 value 1636.099611
## iter 90 value 1635.311278
## iter 100 value 1605.864357
## final value 1605.864357
## stopped after 100 iterations
## # weights: 771
## initial value 8345.656368
## iter 10 value 1740.085251
## iter 20 value 1628.264254
## iter 30 value 1576.758074
## iter 40 value 1574.118106
## iter 50 value 1569.291948
## iter 60 value 1564.737992
## iter 70 value 1560.616325
## iter 80 value 1557.585014
## iter 90 value 1552.976149
## iter 100 value 1495.504399
## final value 1495.504399
## stopped after 100 iterations
## # weights: 221
## initial value 9154.924163
## iter 10 value 1743.333673
## iter 20 value 1742.803236
## iter 30 value 1673.033493
## iter 40 value 1582.521435
## iter 50 value 1497.519470
## iter 60 value 1401.100164
## iter 70 value 1389.837669
## iter 80 value 1371.769042
## iter 90 value 1324.529946
## iter 100 value 1309.357913
## final value 1309.357913
## stopped after 100 iterations
## # weights: 331
## initial value 5030.981513
## iter 10 value 1782.476408
## iter 20 value 1751.233309
## iter 30 value 1750.838842
## iter 40 value 1742.294436
## iter 50 value 1741.989461
## iter 60 value 1690.051486
## iter 70 value 1627.264010
## iter 80 value 1627.026351
## iter 90 value 1586.278796
## iter 100 value 1572.625017
## final value 1572.625017
## stopped after 100 iterations
## # weights: 551
## initial value 6623.695013
## iter 10 value 1751.172458
## iter 20 value 1668.168948
## iter 30 value 1666.674552
## iter 40 value 1661.707725
## iter 50 value 1575.060073
## iter 60 value 1374.325080
## iter 70 value 1306.011868
## iter 80 value 1288.024954
## iter 90 value 1266.081983
## iter 100 value 1257.870299
## final value 1257.870299
## stopped after 100 iterations
## # weights: 771
## initial value 11401.433057
## iter 10 value 1752.279968
## iter 20 value 1747.518099
## iter 30 value 1604.844103
## iter 40 value 1594.431752
## iter 50 value 1594.362582
## iter 60 value 1592.749351
## iter 70 value 1581.194503
## iter 80 value 1574.547686
## iter 90 value 1531.227621
## iter 100 value 1491.816327
## final value 1491.816327
## stopped after 100 iterations
## # weights: 221
## initial value 10036.893580
## iter 10 value 1744.252026
## iter 20 value 1743.492810
## iter 30 value 1687.271721
## iter 40 value 1582.637565
## iter 50 value 1560.279298
## iter 60 value 1540.952890
## iter 70 value 1514.406410
## iter 80 value 1500.391225
## iter 90 value 1493.698333
## iter 100 value 1492.582628
## final value 1492.582628
## stopped after 100 iterations
## # weights: 331
## initial value 6472.182982
## iter 10 value 1741.130968
## iter 20 value 1601.053501
## iter 30 value 1594.141312
## iter 40 value 1531.007705
## iter 50 value 1499.946695
## iter 60 value 1496.090398
## iter 70 value 1495.390296
## iter 80 value 1492.377135
## iter 90 value 1491.522048
## iter 100 value 1487.120362
## final value 1487.120362
## stopped after 100 iterations
## # weights: 551
## initial value 5291.608129
## iter 10 value 1749.815956
## iter 20 value 1747.562215
## iter 30 value 1675.069743
## iter 40 value 1595.672765
## iter 50 value 1586.176473
## iter 60 value 1581.554308
## iter 70 value 1581.314965
## iter 80 value 1580.520932
## iter 90 value 1560.626357
## iter 100 value 1534.260589
## final value 1534.260589
## stopped after 100 iterations
## # weights: 771
## initial value 3985.214550
## iter 10 value 1890.432999
## iter 20 value 1621.315108
## iter 30 value 1568.597547
## iter 40 value 1541.497793
## iter 50 value 1517.228594
## iter 60 value 1503.090912
## iter 70 value 1496.934151
## iter 80 value 1491.235218
## iter 90 value 1481.630209
## iter 100 value 1478.008237
## final value 1478.008237
## stopped after 100 iterations
## # weights: 221
## initial value 7808.437522
## iter 10 value 1738.839016
## iter 20 value 1738.676705
## iter 30 value 1738.241798
## final value 1738.191681
## converged
## # weights: 331
## initial value 9394.854498
## iter 10 value 1799.249914
## iter 20 value 1716.193968
## iter 30 value 1666.302170
## iter 40 value 1587.927955
## iter 50 value 1576.822746
## iter 60 value 1574.807782
## iter 70 value 1564.456941
## iter 80 value 1540.626097
## iter 90 value 1522.936511
## iter 100 value 1501.429072
## final value 1501.429072
## stopped after 100 iterations
## # weights: 551
## initial value 8498.572185
## iter 10 value 1691.944135
## iter 20 value 1675.351576
## iter 30 value 1669.294073
## iter 40 value 1568.756260
## iter 50 value 1508.466170
## iter 60 value 1482.613727
## iter 70 value 1426.176183
## iter 80 value 1418.399263
## iter 90 value 1412.219577
## iter 100 value 1370.994191
## final value 1370.994191
## stopped after 100 iterations
## # weights: 771
## initial value 10247.017237
## iter 10 value 1780.291399
## iter 20 value 1733.120530
## iter 30 value 1731.115171
## iter 40 value 1720.118606
## iter 50 value 1592.441005
## iter 60 value 1586.893544
## iter 70 value 1570.678190
## iter 80 value 1561.287248
## iter 90 value 1559.153902
## iter 100 value 1551.525084
## final value 1551.525084
## stopped after 100 iterations
## # weights: 221
## initial value 3677.363633
## iter 10 value 1742.704296
## iter 20 value 1740.761867
## iter 30 value 1697.214304
## iter 40 value 1632.665133
## iter 50 value 1585.431294
## iter 60 value 1553.667822
## iter 70 value 1545.715582
## iter 80 value 1535.227481
## iter 90 value 1520.997464
## iter 100 value 1508.979654
## final value 1508.979654
## stopped after 100 iterations
## # weights: 331
## initial value 10076.792490
## iter 10 value 1721.780765
## iter 20 value 1598.778928
## iter 30 value 1591.327918
## iter 40 value 1584.172916
## iter 50 value 1541.060349
## iter 60 value 1515.943465
## iter 70 value 1496.156592
## iter 80 value 1470.354600
## iter 90 value 1406.295853
## iter 100 value 1376.000209
## final value 1376.000209
## stopped after 100 iterations
## # weights: 551
## initial value 4184.364993
## iter 10 value 1595.337878
## iter 20 value 1584.370743
## iter 30 value 1571.663154
## iter 40 value 1557.143658
## iter 50 value 1532.705738
## iter 60 value 1523.178189
## iter 70 value 1512.438397
## iter 80 value 1509.035301
## iter 90 value 1505.492535
## iter 100 value 1497.190947
## final value 1497.190947
## stopped after 100 iterations
## # weights: 771
## initial value 7810.693106
## iter 10 value 1732.074709
## iter 20 value 1732.010288
## iter 30 value 1730.453793
## iter 40 value 1599.978031
## iter 50 value 1587.865276
## iter 60 value 1575.100196
## iter 70 value 1560.762449
## iter 80 value 1537.460961
## iter 90 value 1527.636086
## iter 100 value 1519.834041
## final value 1519.834041
## stopped after 100 iterations
## # weights: 221
## initial value 4251.718866
## iter 10 value 1762.023532
## iter 20 value 1758.511532
## iter 30 value 1744.269881
## iter 40 value 1634.068123
## iter 50 value 1599.777921
## iter 60 value 1590.670793
## iter 70 value 1586.545395
## iter 80 value 1586.470026
## iter 90 value 1585.796523
## final value 1585.788769
## converged
## # weights: 331
## initial value 7540.548839
## iter 10 value 1741.790699
## iter 20 value 1740.625390
## iter 30 value 1739.531270
## iter 40 value 1592.214287
## iter 50 value 1586.218753
## iter 60 value 1572.864181
## iter 70 value 1551.621617
## iter 80 value 1536.710594
## iter 90 value 1530.589941
## iter 100 value 1525.326081
## final value 1525.326081
## stopped after 100 iterations
## # weights: 551
## initial value 10538.882511
## iter 10 value 1627.366847
## iter 20 value 1625.245445
## iter 30 value 1625.161074
## iter 40 value 1624.451352
## iter 50 value 1605.462197
## iter 60 value 1592.175446
## iter 70 value 1584.336227
## iter 80 value 1580.576468
## iter 90 value 1557.907941
## iter 100 value 1530.839596
## final value 1530.839596
## stopped after 100 iterations
## # weights: 771
## initial value 5491.561329
## iter 10 value 1816.634243
## iter 20 value 1740.790690
## iter 30 value 1732.852738
## iter 40 value 1645.533444
## iter 50 value 1605.279358
## iter 60 value 1588.940729
## iter 70 value 1578.320640
## iter 80 value 1572.303473
## iter 90 value 1568.090418
## iter 100 value 1539.120435
## final value 1539.120435
## stopped after 100 iterations
## # weights: 221
## initial value 7750.497002
## iter 10 value 2471.012209
## iter 20 value 2351.893420
## iter 30 value 2275.001691
## iter 40 value 2233.081733
## iter 50 value 2062.292020
## iter 60 value 1994.110129
## iter 70 value 1970.597993
## iter 80 value 1932.612818
## iter 90 value 1909.993385
## iter 100 value 1884.881062
## final value 1884.881062
## stopped after 100 iterations
Adult_TDA_PC_5.40.5_n4_NN1Fit0
## Neural Network
##
## 14818 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9879, 9879, 9878
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.9611961 0.1818766
## 2 0.5 0.9634903 0.2988412
## 2 0.7 0.9609261 0.1801302
## 3 0.3 0.9630854 0.2870739
## 3 0.5 0.9589687 0.1754547
## 3 0.7 0.9633554 0.2862024
## 5 0.3 0.9606557 0.1812243
## 5 0.5 0.9628154 0.2844526
## 5 0.7 0.9617358 0.2487758
## 7 0.3 0.9629505 0.2716034
## 7 0.5 0.9618030 0.1989344
## 7 0.7 0.9630179 0.2788320
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 2 and decay = 0.5.
Adult_TDA_PC_5.40.5_n4_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.9643725 0.3264489 Fold3
## 2 0.9627455 0.2906926 Fold2
## 3 0.9633529 0.2793820 Fold1
ad_tda_pc_5.40.5_n4_nn1_fit_re<-Adult_TDA_PC_5.40.5_n4_NN1Fit0$resample[1]
summary(Adult_TDA_PC_5.40.5_n4_NN1Fit0)
## a 108-2-1 network with 221 weights
## options were - entropy fitting decay=0.5
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## -1.71 0.03 0.07 0.53 -0.48 -0.05 0.27 -0.91
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## -0.42 -0.46 -0.26 0.00 -1.28 0.48 -0.28 -0.51
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## -0.52 -0.52 0.98 -0.32 -0.21 0.25 0.53 -0.46
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.29 -0.24 0.01 0.09 0.03 -0.46 0.49 0.60
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## -0.67 -0.58 -0.75 -0.33 0.02 -0.34 -0.09 0.02
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.57 -1.38 -0.11 -0.78 -0.10 -0.98 0.53 0.35
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.21 0.20 0.18 -1.81 0.15 -0.79 -0.68 0.11
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 1.32 0.05 0.19 -0.46 -0.69 -0.79 -0.35 -1.36
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## -0.05 0.00 0.04 0.07 -0.07 0.24 -0.28 -0.31
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## -0.53 -0.29 -0.11 -0.42 0.40 -0.31 -0.06 0.29
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.69 -0.45 0.00 -0.15 -0.21 0.21 0.03 -0.32
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.31 0.27 -0.04 0.81 0.10 -0.73 -0.17 -0.33
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## -0.22 -0.18 -0.08 0.66 0.06 -0.19 -0.06 -0.04
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## -0.18 -0.15 0.14 -0.22 0.11
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 1.20 0.01 -0.18 0.22 -0.22 0.08 -0.12 0.48
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.03 0.59 0.31 0.00 0.44 0.55 0.13 0.22
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## -0.39 0.35 0.50 -0.04 0.05 -0.15 0.01 -0.27
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## -0.35 0.39 0.10 -0.33 0.06 0.21 -0.63 -0.45
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.39 0.33 0.30 1.04 -0.10 -1.03 0.01 0.26
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.48 0.35 0.23 0.24 0.09 0.52 0.10 -0.70
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.36 -0.46 0.84 1.05 0.11 -0.30 0.25 0.22
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## -0.14 0.65 0.29 0.03 0.01 0.22 0.48 0.72
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.00 0.02 -0.09 0.34 -0.43 -0.62 0.11
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## -0.33 0.21 0.05 0.23 0.33 -0.39 -0.11 0.13
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## -0.59 0.26 0.00 0.12 0.17 0.19 -0.15 0.11
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 -0.03 0.16 -0.38 0.15 0.32 0.07 0.08
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.10 0.27 0.13 0.54 -0.27 0.11 0.31 -0.26
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.05 0.10 0.26 0.15 -0.22
## b->o h1->o h2->o
## 1.81 4.19 -7.59
vip(Adult_TDA_PC_5.40.5_n4_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n4_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.40.5_n4_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n4_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n4_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n4_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7402 2059
## >50K 14 293
##
## Accuracy : 0.7878
## 95% CI : (0.7795, 0.7958)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1.17e-11
##
## Kappa : 0.1745
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9981
## Specificity : 0.1246
## Pos Pred Value : 0.7824
## Neg Pred Value : 0.9544
## Prevalence : 0.7592
## Detection Rate : 0.7578
## Detection Prevalence : 0.9686
## Balanced Accuracy : 0.5613
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n4_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7402 2059
## >50K 14 293
##
## Accuracy : 0.7878
## 95% CI : (0.7795, 0.7958)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1.17e-11
##
## Kappa : 0.1745
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9981
## Specificity : 0.1246
## Pos Pred Value : 0.7824
## Neg Pred Value : 0.9544
## Prevalence : 0.7592
## Detection Rate : 0.7578
## Detection Prevalence : 0.9686
## Balanced Accuracy : 0.5613
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n4_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.877764e-01 1.744842e-01 7.795315e-01 7.958494e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.169888e-11 0.000000e+00
ad_tda_pc_5.40.5_n4_nn1_cf0_ov_acc<-ad_tda_pc_5.40.5_n4_nn1_cf0$overall[1]
ad_tda_pc_5.40.5_n4_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9981122 0.1245748 0.7823697
## Neg Pred Value Precision Recall
## 0.9543974 0.7823697 0.9981122
## F1 Prevalence Detection Rate
## 0.8771701 0.7592138 0.7577805
## Detection Prevalence Balanced Accuracy
## 0.9685708 0.5613435
ad_tda_pc_5.40.5_n4_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n4_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.40.5_nn1_n4_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.40.5_n4_nn1_fit_re)
diff_tda_pca_5.40.5_nn1_n4_3_fold
## Accuracy
## 1 -0.1144317
## 2 -0.1697737
## 3 -0.1462958
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_nn1.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nn1_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_nn1.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_nn1.n4_3_fold_odds.left<-bst_tda_pca_5.40.5_nn1.n4_3_fold$probLeft/bst_tda_pca_5.40.5_nn1.n4_3_fold$probRight
bst_tda_pca_5.40.5_nn1.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_nn1.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nn1_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_nn1.n4_3_fold
## $winLeft
## [1] 0.9905667
##
## $winRope
## [1] 0.009433333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.40.5_nn1.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nn1_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nn1.n4_3_fold
## $left
## [1] 0.9906489
##
## $rope
## [1] 0.002229626
##
## $right
## [1] 0.00712152
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_nn1_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_nn1.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nn1_n4_3_fold))
#bf_tda_pca_5.40.5_nn1.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_nn1_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_nn1_n4_3_fold)
## t = -8.9482, df = 2, p-value = 0.01226
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.21250147 -0.07449929
## sample estimates:
## mean of x
## -0.1435004
### Test set diff
diff_tda_pca_5.40.5_nn1.n4_test<-(nn1_cf_ov_acc - ad_tda_pc_5.40.5_n4_nn1_cf0_ov_acc)
diff_tda_pca_5.40.5_nn1.n4_test
## Accuracy
## -0.02856265
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_nn1.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nn1.n4_test),-0.01,0.01)
bst_tda_pca_5.40.5_nn1.n4_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_nn1.n4_test_odds.left<-bst_tda_pca_5.40.5_nn1.n4_test$probLeft/bst_tda_pca_5.40.5_nn1.n4_test$probRight
bst_tda_pca_5.40.5_nn1.n4_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_nn1.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nn1.n4_test),-0.01,0.01)
bsr_tda_pca_5.40.5_nn1.n4_test
## $winLeft
## [1] 0.8434
##
## $winRope
## [1] 0.1566
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.40.5_nn1.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nn1.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nn1.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_nn1.n4_test)))
#BayesFactor
#bf_tda_pca_5.40.5_nn1.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nn1.n4_test)) #bf_tda_pca_5.40.5_nn1.n4_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_nn1.n4_test))
##Node5
#Neural Network 1
Adult_TDA_PC_5.40.5_n5_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n5.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 7942.926868
## iter 10 value 355.117327
## iter 20 value 36.043137
## iter 30 value 33.066816
## iter 40 value 32.587345
## iter 50 value 32.579371
## iter 60 value 32.578074
## final value 32.578065
## converged
## # weights: 331
## initial value 3605.249233
## iter 10 value 343.463170
## iter 20 value 32.111043
## iter 30 value 31.160648
## iter 40 value 31.160194
## iter 50 value 31.160156
## iter 50 value 31.160156
## iter 50 value 31.160156
## final value 31.160156
## converged
## # weights: 551
## initial value 6692.291375
## iter 10 value 899.451147
## iter 20 value 54.019412
## iter 30 value 31.657210
## iter 40 value 29.777412
## iter 50 value 29.708946
## iter 60 value 29.708782
## final value 29.708695
## converged
## # weights: 771
## initial value 6928.993355
## iter 10 value 1068.627082
## iter 20 value 38.724245
## iter 30 value 32.636589
## iter 40 value 29.171255
## iter 50 value 29.101341
## iter 60 value 29.016079
## iter 70 value 28.970190
## iter 80 value 28.969050
## iter 90 value 28.969002
## final value 28.968998
## converged
## # weights: 221
## initial value 4956.590971
## final value 1412.873004
## converged
## # weights: 331
## initial value 7254.598730
## final value 2684.504424
## converged
## # weights: 551
## initial value 8151.370167
## final value 2991.995328
## converged
## # weights: 771
## initial value 6946.274004
## final value 3909.231087
## converged
## # weights: 221
## initial value 6320.824909
## iter 10 value 127.326555
## iter 20 value 39.697804
## final value 39.697370
## converged
## # weights: 331
## initial value 7979.306999
## iter 10 value 64.470103
## iter 20 value 39.487941
## iter 30 value 38.023631
## iter 40 value 37.099045
## iter 50 value 36.669338
## iter 60 value 36.663511
## final value 36.663510
## converged
## # weights: 551
## initial value 10433.701644
## iter 10 value 133.873403
## iter 20 value 34.786422
## iter 30 value 33.724048
## iter 40 value 33.506576
## final value 33.506522
## converged
## # weights: 771
## initial value 1164.654561
## iter 10 value 142.822485
## iter 20 value 34.922391
## iter 30 value 32.737740
## iter 40 value 32.115504
## iter 50 value 31.895831
## iter 60 value 31.877317
## iter 70 value 31.877105
## iter 80 value 31.874628
## iter 90 value 31.873795
## iter 100 value 31.873663
## final value 31.873663
## stopped after 100 iterations
## # weights: 221
## initial value 4134.356255
## iter 10 value 178.171839
## iter 20 value 31.048972
## iter 30 value 25.096713
## iter 40 value 24.973362
## final value 24.973347
## converged
## # weights: 331
## initial value 8139.144329
## iter 10 value 378.670959
## iter 20 value 25.153574
## iter 30 value 24.180283
## iter 40 value 23.564058
## iter 50 value 22.050754
## iter 60 value 21.148013
## iter 70 value 20.591139
## iter 80 value 20.585668
## iter 90 value 20.585100
## iter 100 value 20.584882
## final value 20.584882
## stopped after 100 iterations
## # weights: 551
## initial value 9911.828319
## iter 10 value 564.675646
## iter 20 value 26.173246
## iter 30 value 25.589933
## iter 40 value 21.131439
## iter 50 value 20.599504
## iter 60 value 20.474818
## iter 70 value 20.295861
## iter 80 value 20.271395
## iter 90 value 20.270515
## iter 100 value 20.270457
## final value 20.270457
## stopped after 100 iterations
## # weights: 771
## initial value 6011.031140
## iter 10 value 691.052667
## iter 20 value 25.154534
## iter 30 value 21.914138
## iter 40 value 21.766650
## iter 50 value 21.143598
## iter 60 value 21.123870
## iter 70 value 21.106468
## iter 80 value 21.099230
## iter 90 value 21.099196
## iter 100 value 21.099185
## final value 21.099185
## stopped after 100 iterations
## # weights: 221
## initial value 3413.165267
## iter 10 value 34.535856
## iter 20 value 33.362393
## iter 30 value 29.527218
## iter 40 value 28.835666
## iter 50 value 28.827513
## iter 50 value 28.827513
## iter 50 value 28.827513
## final value 28.827513
## converged
## # weights: 331
## initial value 5399.193410
## final value 1335.770475
## converged
## # weights: 551
## initial value 14582.095870
## final value 7405.284825
## converged
## # weights: 771
## initial value 1462.064049
## iter 10 value 31.523676
## iter 20 value 29.542944
## iter 30 value 29.519660
## iter 40 value 29.504048
## iter 50 value 29.497763
## iter 60 value 28.282892
## iter 70 value 25.817785
## iter 80 value 22.807741
## iter 90 value 22.704018
## iter 100 value 22.691657
## final value 22.691657
## stopped after 100 iterations
## # weights: 221
## initial value 2187.176096
## iter 10 value 123.346754
## iter 20 value 38.528755
## iter 30 value 38.477969
## iter 40 value 38.427214
## iter 50 value 34.522669
## iter 60 value 32.496792
## iter 70 value 32.472854
## iter 80 value 32.472559
## iter 80 value 32.472559
## iter 90 value 32.472536
## iter 90 value 32.472536
## iter 90 value 32.472536
## final value 32.472536
## converged
## # weights: 331
## initial value 3194.765393
## iter 10 value 50.154477
## iter 20 value 38.603315
## iter 30 value 29.422619
## iter 40 value 29.295852
## iter 50 value 29.293438
## final value 29.293408
## converged
## # weights: 551
## initial value 4362.942257
## iter 10 value 141.402010
## iter 20 value 26.642485
## iter 30 value 25.965123
## iter 40 value 25.961315
## iter 50 value 25.960095
## iter 60 value 25.960042
## final value 25.960034
## converged
## # weights: 771
## initial value 17020.503121
## iter 10 value 953.038848
## iter 20 value 41.246184
## iter 30 value 28.396451
## iter 40 value 24.292182
## iter 50 value 24.250279
## iter 60 value 24.249360
## iter 70 value 24.224923
## iter 80 value 24.223706
## iter 90 value 24.222542
## iter 100 value 24.222149
## final value 24.222149
## stopped after 100 iterations
## # weights: 221
## initial value 4438.070555
## iter 10 value 365.447216
## iter 20 value 35.362619
## iter 30 value 35.327244
## iter 40 value 32.581024
## iter 50 value 32.578935
## final value 32.578624
## converged
## # weights: 331
## initial value 8782.774609
## iter 10 value 198.270494
## iter 20 value 38.013155
## iter 30 value 32.702471
## iter 40 value 32.580038
## iter 50 value 32.572081
## iter 60 value 32.335834
## iter 70 value 31.161139
## iter 80 value 31.160740
## iter 90 value 31.160679
## final value 31.160670
## converged
## # weights: 551
## initial value 16682.229323
## iter 10 value 69.219878
## iter 20 value 33.000668
## iter 30 value 32.583488
## iter 40 value 32.530877
## iter 50 value 29.944192
## iter 60 value 29.718105
## iter 70 value 29.717408
## iter 80 value 29.717191
## iter 90 value 29.709284
## iter 100 value 29.709165
## final value 29.709165
## stopped after 100 iterations
## # weights: 771
## initial value 2608.672289
## iter 10 value 366.178080
## iter 20 value 34.528148
## iter 30 value 30.634314
## iter 40 value 30.167829
## iter 50 value 29.644330
## iter 60 value 29.291677
## iter 70 value 29.291382
## iter 80 value 29.290559
## iter 90 value 29.287989
## iter 100 value 29.277138
## final value 29.277138
## stopped after 100 iterations
## # weights: 221
## initial value 6153.663041
## iter 10 value 62.526738
## iter 20 value 36.503623
## iter 30 value 36.222367
## iter 40 value 36.221304
## final value 36.221293
## converged
## # weights: 331
## initial value 11968.898619
## final value 4149.595688
## converged
## # weights: 551
## initial value 8450.432723
## iter 10 value 591.285074
## iter 20 value 34.005089
## iter 30 value 32.599058
## iter 40 value 32.578726
## iter 50 value 32.578682
## iter 60 value 32.578626
## iter 60 value 32.578626
## final value 32.578624
## converged
## # weights: 771
## initial value 2486.245048
## final value 843.946511
## converged
## # weights: 221
## initial value 8721.894553
## iter 10 value 56.669658
## iter 20 value 39.702777
## iter 30 value 39.698177
## final value 39.698156
## converged
## # weights: 331
## initial value 2660.608222
## iter 10 value 120.655079
## iter 20 value 37.125952
## iter 30 value 36.713130
## iter 40 value 36.672620
## iter 50 value 36.666231
## iter 60 value 36.664885
## iter 70 value 36.664499
## iter 80 value 36.664274
## iter 90 value 36.664201
## final value 36.664199
## converged
## # weights: 551
## initial value 6582.131503
## iter 10 value 67.840148
## iter 20 value 56.401897
## iter 30 value 37.710357
## iter 40 value 34.399950
## iter 50 value 33.550233
## iter 60 value 33.525506
## iter 70 value 33.509170
## iter 80 value 33.508797
## iter 90 value 33.507274
## iter 100 value 33.507223
## final value 33.507223
## stopped after 100 iterations
## # weights: 771
## initial value 3745.585180
## iter 10 value 1454.933416
## iter 20 value 178.180728
## iter 30 value 40.602400
## iter 40 value 36.509414
## iter 50 value 35.151876
## iter 60 value 34.375557
## iter 70 value 33.333207
## iter 80 value 32.831726
## iter 90 value 32.664803
## iter 100 value 32.642996
## final value 32.642996
## stopped after 100 iterations
## # weights: 221
## initial value 9598.117710
## iter 10 value 336.549767
## iter 20 value 53.758139
## iter 30 value 50.343715
## iter 40 value 49.762404
## iter 50 value 49.759299
## final value 49.759282
## converged
Adult_TDA_PC_5.40.5_n5_NN1Fit0
## Neural Network
##
## 12081 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8054, 8053, 8055
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.9996689 0
## 2 0.5 0.9996689 0
## 2 0.7 0.9996689 0
## 3 0.3 0.9996689 0
## 3 0.5 0.9996689 0
## 3 0.7 0.9996689 0
## 5 0.3 0.9996689 0
## 5 0.5 0.9996689 0
## 5 0.7 0.9996689 0
## 7 0.3 0.9996689 0
## 7 0.5 0.9996689 0
## 7 0.7 0.9996689 0
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 2 and decay = 0.7.
Adult_TDA_PC_5.40.5_n5_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.9997516 0 Fold3
## 2 0.9995035 0 Fold2
## 3 0.9997517 0 Fold1
ad_tda_pc_5.40.5_n5_nn1_fit_re<-Adult_TDA_PC_5.40.5_n5_NN1Fit0$resample[1]
summary(Adult_TDA_PC_5.40.5_n5_NN1Fit0)
## a 108-2-1 network with 221 weights
## options were - entropy fitting decay=0.7
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 0.00 0.00 0.00
## b->o h1->o h2->o
## -2.46 -2.46 -2.46
vip(Adult_TDA_PC_5.40.5_n5_NN1Fit0,25) + ggtitle("Adult_TDA_PCA_5.40.5_n5_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_PC_5.40.5_n5_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n5_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n5_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n5_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n5_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n5_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_pc_5.40.5_n5_nn1_cf0_ov_acc<-ad_tda_pc_5.40.5_n5_nn1_cf0$overall[1]
ad_tda_pc_5.40.5_n5_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_pc_5.40.5_n5_nn1_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n5_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.40.5_nn1_n5_3_fold<-(ad_nn1_fit_re - ad_tda_pc_5.40.5_n5_nn1_fit_re)
diff_tda_pca_5.40.5_nn1_n5_3_fold
## Accuracy
## 1 -0.1498108
## 2 -0.2065316
## 3 -0.1826946
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_nn1.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nn1_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_nn1.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_nn1.n5_3_fold_odds.left<-bst_tda_pca_5.40.5_nn1.n5_3_fold$probLeft/bst_tda_pca_5.40.5_nn1.n5_3_fold$probRight
bst_tda_pca_5.40.5_nn1.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_nn1.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nn1_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_nn1.n5_3_fold
## $winLeft
## [1] 0.9916333
##
## $winRope
## [1] 0.008366667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.40.5_nn1.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nn1_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nn1.n5_3_fold
## $left
## [1] 0.9938545
##
## $rope
## [1] 0.001209545
##
## $right
## [1] 0.004935967
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_nn1_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_nn1.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nn1_n5_3_fold))
#bf_tda_pca_5.40.5_nn1.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_nn1_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_nn1_n5_3_fold)
## t = -10.927, df = 2, p-value = 0.008271
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.2504282 -0.1089298
## sample estimates:
## mean of x
## -0.179679
### Test set diff
diff_tda_pca_5.40.5_nn1.n5_test<-(nn1_cf_ov_acc - ad_tda_pc_5.40.5_n5_nn1_cf0_ov_acc)
diff_tda_pca_5.40.5_nn1.n5_test
## Accuracy
## 0
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_nn1.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nn1.n5_test),-0.01,0.01)
bst_tda_pca_5.40.5_nn1.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_nn1.n5_test_odds.left<-bst_tda_pca_5.40.5_nn1.n5_test$probLeft/bst_tda_pca_5.40.5_nn1.n5_test$probRight
bst_tda_pca_5.40.5_nn1.n5_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_nn1.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nn1.n5_test),-0.01,0.01)
bsr_tda_pca_5.40.5_nn1.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.40.5_nn1.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nn1.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nn1.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_nn1.n5_test)))
#BayesFactor
#bf_tda_pca_5.40.5_nn1.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nn1.n5_test)) #bf_tda_pca_5.40.5_nn1.n5_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_nn1.n5_test))
##With TDA KDE filter 5 intervals, 50% overlap, 5 bins
##Node1
#Neural Network 1
Adult_TDA_KDE_5.40.5_n1_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n1.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 5401.509703
## iter 10 value 4209.392862
## iter 20 value 4090.992099
## iter 30 value 4077.036532
## iter 40 value 4073.209742
## iter 50 value 4069.354646
## iter 60 value 3977.147999
## iter 70 value 3881.524185
## iter 80 value 3536.149055
## iter 90 value 3126.210017
## iter 100 value 2848.849146
## final value 2848.849146
## stopped after 100 iterations
## # weights: 331
## initial value 5016.227797
## iter 10 value 4513.087136
## iter 20 value 4380.199548
## iter 30 value 4158.012016
## iter 40 value 3993.953463
## iter 50 value 3939.766280
## iter 60 value 3917.610239
## iter 70 value 3891.811686
## iter 80 value 3831.486984
## iter 90 value 3757.846009
## iter 100 value 3644.150341
## final value 3644.150341
## stopped after 100 iterations
## # weights: 551
## initial value 5804.548891
## iter 10 value 4122.569459
## iter 20 value 4094.553122
## iter 30 value 4042.147582
## iter 40 value 4011.991556
## iter 50 value 3977.445645
## iter 60 value 3945.324982
## iter 70 value 3927.557410
## iter 80 value 3913.647728
## iter 90 value 3861.478037
## iter 100 value 3447.754778
## final value 3447.754778
## stopped after 100 iterations
## # weights: 771
## initial value 5769.132866
## iter 10 value 4490.700237
## iter 20 value 4075.390768
## iter 30 value 3998.280276
## iter 40 value 3923.024835
## iter 50 value 3878.468352
## iter 60 value 3868.009690
## iter 70 value 3852.991813
## iter 80 value 3769.402528
## iter 90 value 3682.267279
## iter 100 value 3232.453044
## final value 3232.453044
## stopped after 100 iterations
## # weights: 221
## initial value 5203.978032
## iter 10 value 4517.486787
## iter 20 value 4106.953365
## iter 30 value 4091.062191
## iter 40 value 4067.180410
## iter 50 value 4028.915330
## iter 60 value 3977.039652
## iter 70 value 3932.904661
## iter 80 value 3906.078708
## iter 90 value 3874.231834
## iter 100 value 3861.593806
## final value 3861.593806
## stopped after 100 iterations
## # weights: 331
## initial value 8326.870293
## iter 10 value 4336.703562
## iter 20 value 4321.021575
## iter 30 value 4129.240120
## iter 40 value 3655.189053
## iter 50 value 3252.601200
## iter 60 value 3062.209188
## iter 70 value 2645.697762
## iter 80 value 2550.007625
## iter 90 value 2520.596817
## iter 100 value 2503.504374
## final value 2503.504374
## stopped after 100 iterations
## # weights: 551
## initial value 4677.510831
## iter 10 value 4518.674018
## iter 20 value 4517.400647
## iter 30 value 4517.170013
## iter 40 value 4119.236731
## iter 50 value 3773.748379
## iter 60 value 3240.754068
## iter 70 value 2926.448153
## iter 80 value 2850.603812
## iter 90 value 2795.933627
## iter 100 value 2778.351183
## final value 2778.351183
## stopped after 100 iterations
## # weights: 771
## initial value 8868.164134
## iter 10 value 4896.858158
## iter 20 value 4605.557499
## iter 30 value 4439.367406
## iter 40 value 4437.321047
## iter 50 value 4151.038923
## iter 60 value 4084.656045
## iter 70 value 4046.944653
## iter 80 value 3857.740487
## iter 90 value 3545.974378
## iter 100 value 3350.686733
## final value 3350.686733
## stopped after 100 iterations
## # weights: 221
## initial value 7274.192985
## iter 10 value 4509.643380
## iter 20 value 4116.775702
## iter 30 value 4110.411886
## iter 40 value 4110.394099
## iter 50 value 4107.539700
## iter 60 value 4106.965007
## iter 70 value 4065.329814
## iter 80 value 4050.607859
## iter 90 value 3998.422368
## iter 100 value 3935.405803
## final value 3935.405803
## stopped after 100 iterations
## # weights: 331
## initial value 5303.818911
## iter 10 value 4135.172962
## iter 20 value 4068.188345
## iter 30 value 3992.490944
## iter 40 value 3906.330354
## iter 50 value 3727.262392
## iter 60 value 3661.437450
## iter 70 value 3557.395824
## iter 80 value 3197.719071
## iter 90 value 2710.053984
## iter 100 value 2579.248158
## final value 2579.248158
## stopped after 100 iterations
## # weights: 551
## initial value 6193.099263
## iter 10 value 4418.024486
## iter 20 value 4283.877598
## iter 30 value 4116.804516
## iter 40 value 4075.882975
## iter 50 value 4019.582799
## iter 60 value 3968.382834
## iter 70 value 3934.947750
## iter 80 value 3663.519631
## iter 90 value 3238.816235
## iter 100 value 3064.204919
## final value 3064.204919
## stopped after 100 iterations
## # weights: 771
## initial value 8376.891975
## iter 10 value 4229.431615
## iter 20 value 4069.391169
## iter 30 value 4042.466997
## iter 40 value 3995.127488
## iter 50 value 3977.379569
## iter 60 value 3965.117691
## iter 70 value 3960.132152
## iter 80 value 3959.223714
## iter 90 value 3949.180524
## iter 100 value 3666.190963
## final value 3666.190963
## stopped after 100 iterations
## # weights: 221
## initial value 5244.440945
## iter 10 value 4518.875879
## final value 4518.870312
## converged
## # weights: 331
## initial value 6141.747555
## iter 10 value 4167.961980
## iter 20 value 3815.233819
## iter 30 value 3538.249240
## iter 40 value 3245.493534
## iter 50 value 2779.537394
## iter 60 value 2585.838338
## iter 70 value 2525.498236
## iter 80 value 2484.360534
## iter 90 value 2475.593458
## iter 100 value 2441.458336
## final value 2441.458336
## stopped after 100 iterations
## # weights: 551
## initial value 8422.057661
## iter 10 value 4226.908024
## iter 20 value 4161.560765
## iter 30 value 4159.156196
## iter 40 value 4129.822320
## iter 50 value 4074.268662
## iter 60 value 4009.643216
## iter 70 value 3943.812714
## iter 80 value 3902.448756
## iter 90 value 3887.379055
## iter 100 value 3865.665284
## final value 3865.665284
## stopped after 100 iterations
## # weights: 771
## initial value 4495.530617
## iter 10 value 4173.565430
## iter 20 value 4084.331433
## iter 30 value 4077.540106
## iter 40 value 4068.000941
## iter 50 value 4064.269095
## iter 60 value 4056.861362
## iter 70 value 4053.977541
## iter 80 value 4002.585433
## iter 90 value 3978.762877
## iter 100 value 3945.756915
## final value 3945.756915
## stopped after 100 iterations
## # weights: 221
## initial value 5816.644453
## iter 10 value 4503.739629
## iter 20 value 4023.325542
## iter 30 value 4000.745438
## iter 40 value 3994.117802
## iter 50 value 3990.303492
## iter 60 value 3985.861515
## iter 70 value 3984.334916
## iter 80 value 3952.744490
## iter 90 value 3937.635913
## iter 100 value 3927.074353
## final value 3927.074353
## stopped after 100 iterations
## # weights: 331
## initial value 4549.744478
## iter 10 value 4314.793430
## iter 20 value 4136.258122
## iter 30 value 4111.224015
## iter 40 value 4034.908447
## iter 50 value 4027.966548
## iter 60 value 4025.686465
## iter 70 value 4011.671746
## iter 80 value 3607.720873
## iter 90 value 2979.445343
## iter 100 value 2747.472522
## final value 2747.472522
## stopped after 100 iterations
## # weights: 551
## initial value 9414.461285
## iter 10 value 4351.732208
## iter 20 value 4132.491308
## iter 30 value 4120.253340
## iter 40 value 4093.429411
## iter 50 value 4014.754993
## iter 60 value 3918.278010
## iter 70 value 3890.538778
## iter 80 value 3835.542400
## iter 90 value 3517.142862
## iter 100 value 3358.098884
## final value 3358.098884
## stopped after 100 iterations
## # weights: 771
## initial value 4572.998836
## iter 10 value 4169.298999
## iter 20 value 4124.578757
## iter 30 value 4055.451266
## iter 40 value 4051.798168
## iter 50 value 4022.222117
## iter 60 value 4019.853110
## iter 70 value 4016.166104
## iter 80 value 3975.328860
## iter 90 value 3731.953383
## iter 100 value 3312.628378
## final value 3312.628378
## stopped after 100 iterations
## # weights: 221
## initial value 5463.667175
## iter 10 value 4482.130271
## iter 20 value 4478.009045
## iter 30 value 4171.356601
## iter 40 value 4092.351745
## iter 50 value 4085.786951
## iter 60 value 4032.714830
## iter 70 value 4023.292297
## iter 80 value 4020.864125
## iter 90 value 4019.450823
## iter 100 value 3986.495455
## final value 3986.495455
## stopped after 100 iterations
## # weights: 331
## initial value 5958.066323
## iter 10 value 4314.463912
## iter 20 value 4300.233222
## iter 30 value 4135.921719
## iter 40 value 4134.939098
## iter 50 value 4114.454385
## iter 60 value 4035.420266
## iter 70 value 4027.899530
## iter 80 value 3992.677234
## iter 90 value 3880.790264
## iter 100 value 3714.367578
## final value 3714.367578
## stopped after 100 iterations
## # weights: 551
## initial value 5742.613447
## iter 10 value 4180.759619
## iter 20 value 4072.336529
## iter 30 value 4044.312988
## iter 40 value 4032.208779
## iter 50 value 3968.450301
## iter 60 value 3901.196595
## iter 70 value 3886.989076
## iter 80 value 3879.986110
## iter 90 value 3875.250159
## iter 100 value 3827.460139
## final value 3827.460139
## stopped after 100 iterations
## # weights: 771
## initial value 5763.822824
## iter 10 value 4346.960326
## iter 20 value 4156.607902
## iter 30 value 4154.086398
## iter 40 value 4152.654732
## iter 50 value 4059.838757
## iter 60 value 4013.413449
## iter 70 value 3890.857407
## iter 80 value 3741.924095
## iter 90 value 3728.315390
## iter 100 value 3714.334538
## final value 3714.334538
## stopped after 100 iterations
## # weights: 221
## initial value 6464.936578
## iter 10 value 4517.282491
## iter 20 value 4454.136226
## iter 30 value 4073.798218
## iter 40 value 4033.441494
## iter 50 value 4022.239457
## iter 60 value 4002.800718
## iter 70 value 3989.651052
## iter 80 value 3973.299930
## iter 90 value 3932.496093
## iter 100 value 3914.157623
## final value 3914.157623
## stopped after 100 iterations
## # weights: 331
## initial value 5680.108040
## iter 10 value 4515.320944
## iter 20 value 4087.363049
## iter 30 value 4064.745156
## iter 40 value 3996.792058
## iter 50 value 3960.943587
## iter 60 value 3956.733008
## iter 70 value 3942.711818
## iter 80 value 3889.150158
## iter 90 value 3848.882847
## iter 100 value 3821.836769
## final value 3821.836769
## stopped after 100 iterations
## # weights: 551
## initial value 7697.853993
## iter 10 value 4513.709758
## iter 20 value 4018.939634
## iter 30 value 4012.503320
## iter 40 value 4005.985192
## iter 50 value 4002.948530
## iter 60 value 3987.220372
## iter 70 value 3945.934511
## iter 80 value 3911.791384
## iter 90 value 3900.560115
## iter 100 value 3881.402327
## final value 3881.402327
## stopped after 100 iterations
## # weights: 771
## initial value 7546.066566
## iter 10 value 4408.243612
## iter 20 value 4131.454477
## iter 30 value 3996.321035
## iter 40 value 3991.243503
## iter 50 value 3979.399818
## iter 60 value 3972.569371
## iter 70 value 3944.479115
## iter 80 value 3903.803959
## iter 90 value 3864.480272
## iter 100 value 3791.126695
## final value 3791.126695
## stopped after 100 iterations
## # weights: 221
## initial value 5186.363507
## iter 10 value 4421.178274
## iter 20 value 4148.231385
## iter 30 value 4045.672921
## iter 40 value 4026.417823
## iter 50 value 4025.380508
## iter 60 value 3989.500768
## iter 70 value 3942.498639
## iter 80 value 3922.585845
## iter 90 value 3920.145002
## iter 100 value 3885.028438
## final value 3885.028438
## stopped after 100 iterations
## # weights: 331
## initial value 6445.654016
## iter 10 value 4449.760552
## iter 20 value 4383.141813
## iter 30 value 4253.934287
## iter 40 value 4022.807511
## iter 50 value 3946.424641
## iter 60 value 3698.810012
## iter 70 value 3062.428450
## iter 80 value 2898.385282
## iter 90 value 2643.913331
## iter 100 value 2565.033369
## final value 2565.033369
## stopped after 100 iterations
## # weights: 551
## initial value 4680.174234
## iter 10 value 4349.262932
## iter 20 value 4344.009698
## iter 30 value 4326.870286
## iter 40 value 4278.156281
## iter 50 value 3909.775433
## iter 60 value 3120.346884
## iter 70 value 2773.056463
## iter 80 value 2754.652805
## iter 90 value 2629.734806
## iter 100 value 2554.226358
## final value 2554.226358
## stopped after 100 iterations
## # weights: 771
## initial value 4786.518209
## iter 10 value 4161.597472
## iter 20 value 4146.045370
## iter 30 value 4139.933233
## iter 40 value 4128.360031
## iter 50 value 3990.735465
## iter 60 value 3715.458467
## iter 70 value 3152.117382
## iter 80 value 2898.937904
## iter 90 value 2859.129566
## iter 100 value 2686.404981
## final value 2686.404981
## stopped after 100 iterations
## # weights: 221
## initial value 6947.454858
## iter 10 value 4442.076575
## iter 20 value 4121.357880
## iter 30 value 4121.168373
## iter 40 value 4120.425164
## iter 50 value 4120.294468
## iter 60 value 4120.162678
## iter 70 value 4054.929910
## iter 80 value 3988.157623
## iter 90 value 3973.416222
## iter 100 value 3960.688778
## final value 3960.688778
## stopped after 100 iterations
## # weights: 331
## initial value 5094.584864
## iter 10 value 4518.788367
## iter 20 value 4517.650071
## iter 30 value 4489.008150
## iter 40 value 4013.618489
## iter 50 value 3962.658918
## iter 60 value 3942.477286
## iter 70 value 3923.867710
## iter 80 value 3904.019966
## iter 90 value 3822.296711
## iter 100 value 3604.228585
## final value 3604.228585
## stopped after 100 iterations
## # weights: 551
## initial value 4599.510601
## iter 10 value 4356.636166
## iter 20 value 4132.305900
## iter 30 value 4077.929608
## iter 40 value 4055.091179
## iter 50 value 4048.578910
## iter 60 value 4028.326082
## iter 70 value 4019.504154
## iter 80 value 4007.310702
## iter 90 value 3994.476132
## iter 100 value 3827.574757
## final value 3827.574757
## stopped after 100 iterations
## # weights: 771
## initial value 12055.816494
## iter 10 value 4492.939310
## iter 20 value 4342.136817
## iter 30 value 4039.468631
## iter 40 value 4015.641937
## iter 50 value 4011.436947
## iter 60 value 3993.269769
## iter 70 value 3975.575610
## iter 80 value 3719.186423
## iter 90 value 3418.065818
## iter 100 value 2924.279148
## final value 2924.279148
## stopped after 100 iterations
## # weights: 331
## initial value 6776.448754
## iter 10 value 6204.101783
## iter 20 value 6193.309695
## iter 30 value 6177.733397
## iter 40 value 6107.348436
## iter 50 value 6013.804645
## iter 60 value 6010.491073
## iter 70 value 5983.843106
## iter 80 value 5946.797970
## iter 90 value 4867.325041
## iter 100 value 4728.678549
## final value 4728.678549
## stopped after 100 iterations
Adult_TDA_KDE_5.40.5_n1_NN1Fit0
## Neural Network
##
## 11838 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7891, 7893, 7892
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.7869496 0.3071721
## 2 0.5 0.7976855 0.3489376
## 2 0.7 0.7960802 0.3277211
## 3 0.3 0.8263250 0.4828081
## 3 0.5 0.8563101 0.6216816
## 3 0.7 0.8220934 0.4522682
## 5 0.3 0.8057946 0.3964069
## 5 0.5 0.8367090 0.5805923
## 5 0.7 0.7959126 0.3193015
## 7 0.3 0.8007242 0.3742706
## 7 0.5 0.8213359 0.4926218
## 7 0.7 0.8041060 0.3823065
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.5.
Adult_TDA_KDE_5.40.5_n1_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.8573239 0.6233843 Fold3
## 2 0.8555133 0.6195209 Fold2
## 3 0.8560932 0.6221395 Fold1
ad_tda_kde_5.40.5_n1_nn1_fit_re<-Adult_TDA_KDE_5.40.5_n1_NN1Fit0$resample[1]
summary(Adult_TDA_KDE_5.40.5_n1_NN1Fit0)
## a 108-3-1 network with 331 weights
## options were - entropy fitting decay=0.5
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 -0.03 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 -0.01 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 0.00 -0.06 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.00 0.00 0.00 0.02 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.00 0.00 0.00 0.00 0.01 0.00 0.00 0.00
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 3.94 -0.18 0.07 0.00 0.00 0.00 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 0.00 0.00 0.00
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 2.96 -0.01 0.33 -0.50 0.83 0.00 0.35 -0.22
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 0.66 0.62 0.88 0.00 0.28 0.46 0.37 0.44
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## -0.03 0.39 0.37 0.68 0.37 0.18 -1.04 0.25
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## -0.21 1.15 -0.72 -0.01 -0.17 0.52 -0.05 -1.06
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 2.15 0.95 0.30 0.16 0.33 0.32 0.12 -0.23
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## -0.67 0.40 1.15 1.02 0.98 0.84 -0.31 0.30
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## -0.67 -0.85 0.22 0.49 0.53 1.00 1.01 0.58
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## -0.65 1.16 0.19 0.71 0.05 0.85 1.74 1.22
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.00 0.68 -0.03 -0.12 -0.71 -0.86 0.20 0.66
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.59 0.79 0.38 0.49 0.21 -0.23 -1.13 0.81
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.82 -0.44 0.00 -0.07 -0.62 -0.27 0.64 0.72
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## -0.09 -1.71 1.05 -0.38 -0.13 0.09 1.57 0.56
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.28 0.24 -0.52 0.42 -0.48 -0.60 2.51 -1.35
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.30 0.49 -0.86 0.15 -0.43
## b->o h1->o h2->o h3->o
## 1.62 1.15 0.95 -5.08
vip(Adult_TDA_KDE_5.40.5_n1_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.40.5_n1_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.40.5_n1_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n1_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n1_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n1_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6801 1046
## >50K 615 1306
##
## Accuracy : 0.83
## 95% CI : (0.8224, 0.8374)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5039
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9171
## Specificity : 0.5553
## Pos Pred Value : 0.8667
## Neg Pred Value : 0.6799
## Prevalence : 0.7592
## Detection Rate : 0.6963
## Detection Prevalence : 0.8033
## Balanced Accuracy : 0.7362
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n1_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6801 1046
## >50K 615 1306
##
## Accuracy : 0.83
## 95% CI : (0.8224, 0.8374)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5039
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9171
## Specificity : 0.5553
## Pos Pred Value : 0.8667
## Neg Pred Value : 0.6799
## Prevalence : 0.7592
## Detection Rate : 0.6963
## Detection Prevalence : 0.8033
## Balanced Accuracy : 0.7362
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n1_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.299550e-01 5.038680e-01 8.223557e-01 8.373569e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 5.507872e-65 5.038922e-26
ad_tda_kde_5.40.5_n1_nn1_cf0_ov_acc<-ad_tda_kde_5.40.5_n1_nn1_cf0$overall[1]
ad_tda_kde_5.40.5_n1_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9170712 0.5552721 0.8667006
## Neg Pred Value Precision Recall
## 0.6798542 0.8667006 0.9170712
## F1 Prevalence Detection Rate
## 0.8911747 0.7592138 0.6962531
## Detection Prevalence Balanced Accuracy
## 0.8033374 0.7361717
ad_tda_kde_5.40.5_n1_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n1_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_nn1_n1_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.40.5_n1_nn1_fit_re)
diff_tda_kde_5.40.5_nn1_n1_3_fold
## Accuracy
## 1 -0.007383106
## 2 -0.062541473
## 3 -0.039036115
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_nn1.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nn1_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_nn1.n1_3_fold
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_nn1.n1_3_fold_odds.left<-bst_tda_kde_5.40.5_nn1.n1_3_fold$probLeft/bst_tda_kde_5.40.5_nn1.n1_3_fold$probRight
bst_tda_kde_5.40.5_nn1.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_nn1.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nn1_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_nn1.n1_3_fold
## $winLeft
## [1] 0.8559333
##
## $winRope
## [1] 0.1440667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_nn1.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nn1_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nn1.n1_3_fold
## $left
## [1] 0.8550607
##
## $rope
## [1] 0.08056161
##
## $right
## [1] 0.06437773
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_nn1_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_nn1.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nn1_n1_3_fold))
#bf_tda_kde_5.40.5_nn1.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_nn1_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_nn1_n1_3_fold)
## t = -2.2728, df = 2, p-value = 0.151
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.10507941 0.03243895
## sample estimates:
## mean of x
## -0.03632023
### Test set diff
diff_tda_kde_5.40.5_nn1.n1_test<-(nn1_cf_ov_acc - ad_tda_kde_5.40.5_n1_nn1_cf0_ov_acc)
diff_tda_kde_5.40.5_nn1.n1_test
## Accuracy
## -0.0707412
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_nn1.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nn1.n1_test),-0.01,0.01)
bst_tda_kde_5.40.5_nn1.n1_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_nn1.n1_test_odds.left<-bst_tda_kde_5.40.5_nn1.n1_test$probLeft/bst_tda_kde_5.40.5_nn1.n1_test$probRight
bst_tda_kde_5.40.5_nn1.n1_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_nn1.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nn1.n1_test),-0.01,0.01)
bsr_tda_kde_5.40.5_nn1.n1_test
## $winLeft
## [1] 0.8425667
##
## $winRope
## [1] 0.1574333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_nn1.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nn1.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nn1.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_nn1.n1_test)))
#BayesFactor
#bf_tda_kde_5.40.5_nn1.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nn1.n1_test)) #bf_tda_pca_5.40.5_nn1.n1_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_nn1.n1_test))
##Node2
#Neural Network 1
Adult_TDA_KDE_5.40.5_n2_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n3.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 4097.830708
## iter 10 value 3948.916991
## iter 20 value 3911.070706
## iter 30 value 3837.520903
## iter 40 value 3651.529123
## iter 50 value 3493.270197
## iter 60 value 3020.884325
## iter 70 value 2750.306695
## iter 80 value 2551.028000
## iter 90 value 2427.162803
## iter 100 value 2389.082745
## final value 2389.082745
## stopped after 100 iterations
## # weights: 331
## initial value 4135.488880
## iter 10 value 4077.492751
## iter 20 value 3831.965470
## iter 30 value 3802.253140
## iter 40 value 3777.017727
## iter 50 value 3752.019065
## iter 60 value 3721.047382
## iter 70 value 3653.039398
## iter 80 value 3379.751399
## iter 90 value 3079.506368
## iter 100 value 3020.946373
## final value 3020.946373
## stopped after 100 iterations
## # weights: 551
## initial value 5779.892868
## iter 10 value 4093.294607
## iter 20 value 3843.931714
## iter 30 value 3840.658862
## iter 40 value 3829.811906
## iter 50 value 3827.741759
## iter 60 value 3825.918507
## iter 70 value 3820.496191
## iter 80 value 3815.937913
## iter 90 value 3813.846782
## iter 100 value 3744.969171
## final value 3744.969171
## stopped after 100 iterations
## # weights: 771
## initial value 8655.760644
## iter 10 value 3958.591297
## iter 20 value 3874.435852
## iter 30 value 3827.194502
## iter 40 value 3751.197180
## iter 50 value 3740.888503
## iter 60 value 3688.263634
## iter 70 value 3512.522351
## iter 80 value 3252.888673
## iter 90 value 2833.683879
## iter 100 value 2669.724424
## final value 2669.724424
## stopped after 100 iterations
## # weights: 221
## initial value 5008.990779
## iter 10 value 4127.279180
## iter 20 value 3839.804898
## iter 30 value 3826.307166
## iter 40 value 3790.535146
## iter 50 value 3756.678074
## iter 60 value 3721.744364
## iter 70 value 3703.875977
## iter 80 value 3685.819457
## iter 90 value 3656.545183
## iter 100 value 3626.824690
## final value 3626.824690
## stopped after 100 iterations
## # weights: 331
## initial value 4307.654355
## iter 10 value 3860.097053
## iter 20 value 3766.828282
## iter 30 value 3743.510094
## iter 40 value 3716.063012
## iter 50 value 3646.683052
## iter 60 value 3318.734314
## iter 70 value 3018.990811
## iter 80 value 2996.082525
## iter 90 value 2973.224457
## iter 100 value 2893.815044
## final value 2893.815044
## stopped after 100 iterations
## # weights: 551
## initial value 6623.848535
## iter 10 value 4136.963406
## iter 20 value 4124.314199
## iter 30 value 3842.427752
## iter 40 value 3833.440330
## iter 50 value 3821.285457
## iter 60 value 3719.074120
## iter 70 value 3685.634806
## iter 80 value 3670.525694
## iter 90 value 3384.486137
## iter 100 value 3238.673927
## final value 3238.673927
## stopped after 100 iterations
## # weights: 771
## initial value 4866.833137
## iter 10 value 3883.723609
## iter 20 value 3802.900266
## iter 30 value 3787.906163
## iter 40 value 3747.340044
## iter 50 value 3662.515736
## iter 60 value 3499.218934
## iter 70 value 3289.115831
## iter 80 value 3065.258115
## iter 90 value 2732.421160
## iter 100 value 2644.155857
## final value 2644.155857
## stopped after 100 iterations
## # weights: 221
## initial value 4161.539722
## iter 10 value 4127.937946
## iter 20 value 4127.387514
## iter 30 value 4127.381168
## iter 30 value 4127.381142
## iter 30 value 4127.381141
## final value 4127.381141
## converged
## # weights: 331
## initial value 5221.673854
## iter 10 value 4095.334447
## iter 20 value 3920.640384
## iter 30 value 3818.721113
## iter 40 value 3758.945479
## iter 50 value 3744.650909
## iter 60 value 3721.408241
## iter 70 value 3708.425390
## iter 80 value 3686.665094
## iter 90 value 3660.129716
## iter 100 value 3622.332371
## final value 3622.332371
## stopped after 100 iterations
## # weights: 551
## initial value 4798.417284
## iter 10 value 4127.364356
## iter 20 value 4107.535772
## iter 30 value 3798.852041
## iter 40 value 3755.340710
## iter 50 value 3747.436866
## iter 60 value 3737.124818
## iter 70 value 3724.678954
## iter 80 value 3695.657003
## iter 90 value 3671.668726
## iter 100 value 3661.548834
## final value 3661.548834
## stopped after 100 iterations
## # weights: 771
## initial value 4471.146024
## iter 10 value 4087.123118
## iter 20 value 4016.740038
## iter 30 value 3871.463900
## iter 40 value 3810.506802
## iter 50 value 3481.725769
## iter 60 value 2897.050691
## iter 70 value 2648.348134
## iter 80 value 2523.414511
## iter 90 value 2475.772362
## iter 100 value 2458.607925
## final value 2458.607925
## stopped after 100 iterations
## # weights: 221
## initial value 4729.978143
## iter 10 value 4127.173506
## iter 20 value 4127.105448
## iter 30 value 3831.432406
## iter 40 value 3782.757251
## iter 50 value 3758.499207
## iter 60 value 3745.524654
## iter 70 value 3735.965902
## iter 80 value 3704.799841
## iter 90 value 3690.080877
## iter 100 value 3681.467573
## final value 3681.467573
## stopped after 100 iterations
## # weights: 331
## initial value 4146.413469
## iter 10 value 4057.262271
## iter 20 value 4015.888510
## iter 30 value 3817.918604
## iter 40 value 3810.693499
## iter 50 value 3796.575072
## iter 60 value 3738.250103
## iter 70 value 3527.979528
## iter 80 value 3265.339854
## iter 90 value 3228.620395
## iter 100 value 2864.817106
## final value 2864.817106
## stopped after 100 iterations
## # weights: 551
## initial value 4170.888061
## iter 10 value 4081.638239
## iter 20 value 3880.354890
## iter 30 value 3859.617503
## iter 40 value 3791.333100
## iter 50 value 3774.208455
## iter 60 value 3771.264026
## iter 70 value 3769.252482
## iter 80 value 3767.834930
## iter 90 value 3712.270035
## iter 100 value 3690.535499
## final value 3690.535499
## stopped after 100 iterations
## # weights: 771
## initial value 6478.485390
## iter 10 value 3981.340729
## iter 20 value 3816.017624
## iter 30 value 3799.387855
## iter 40 value 3789.365738
## iter 50 value 3770.555324
## iter 60 value 3648.973532
## iter 70 value 3250.831510
## iter 80 value 3129.325279
## iter 90 value 3027.819990
## iter 100 value 2792.980296
## final value 2792.980296
## stopped after 100 iterations
## # weights: 221
## initial value 4147.016568
## iter 10 value 4127.788633
## iter 20 value 4127.302652
## final value 4127.297049
## converged
## # weights: 331
## initial value 5629.769012
## iter 10 value 4142.134573
## iter 20 value 3968.430398
## iter 30 value 3893.574272
## iter 40 value 3882.649946
## iter 50 value 3881.362667
## iter 60 value 3879.943896
## iter 70 value 3877.796927
## iter 80 value 3877.714313
## final value 3877.712890
## converged
## # weights: 551
## initial value 4623.268172
## iter 10 value 4110.162848
## iter 20 value 3934.016543
## iter 30 value 3870.809322
## iter 40 value 3857.632345
## iter 50 value 3833.260124
## iter 60 value 3798.813442
## iter 70 value 3766.936901
## iter 80 value 3745.270527
## iter 90 value 3729.815287
## iter 100 value 3715.007129
## final value 3715.007129
## stopped after 100 iterations
## # weights: 771
## initial value 4234.835165
## iter 10 value 4092.150126
## iter 20 value 3888.059747
## iter 30 value 3879.113975
## iter 40 value 3877.542481
## iter 50 value 3867.143364
## iter 60 value 3858.880436
## iter 70 value 3812.884812
## iter 80 value 3772.465379
## iter 90 value 3767.074739
## iter 100 value 3764.897226
## final value 3764.897226
## stopped after 100 iterations
## # weights: 221
## initial value 4802.384345
## iter 10 value 4063.370971
## iter 20 value 3990.010054
## iter 30 value 3989.251747
## iter 40 value 3898.737474
## iter 50 value 3800.583245
## iter 60 value 3785.329074
## iter 70 value 3776.769515
## iter 80 value 3776.596903
## iter 80 value 3776.596866
## iter 80 value 3776.596861
## final value 3776.596861
## converged
## # weights: 331
## initial value 6049.068771
## iter 10 value 4127.942948
## iter 20 value 4009.893877
## iter 30 value 3823.766722
## iter 40 value 3782.772137
## iter 50 value 3731.227438
## iter 60 value 3052.165745
## iter 70 value 2916.953868
## iter 80 value 2815.470498
## iter 90 value 2620.222775
## iter 100 value 2514.690329
## final value 2514.690329
## stopped after 100 iterations
## # weights: 551
## initial value 7931.965953
## iter 10 value 4043.732767
## iter 20 value 3816.223704
## iter 30 value 3807.068558
## iter 40 value 3793.634842
## iter 50 value 3786.069180
## iter 60 value 3781.796263
## iter 70 value 3772.409089
## iter 80 value 3759.080109
## iter 90 value 3717.552937
## iter 100 value 3690.224248
## final value 3690.224248
## stopped after 100 iterations
## # weights: 771
## initial value 4554.181499
## iter 10 value 4099.177507
## iter 20 value 4084.949321
## iter 30 value 3907.654399
## iter 40 value 3894.191615
## iter 50 value 3864.876048
## iter 60 value 3855.984864
## iter 70 value 3764.050679
## iter 80 value 3726.755665
## iter 90 value 3719.175046
## iter 100 value 3674.523924
## final value 3674.523924
## stopped after 100 iterations
## # weights: 221
## initial value 4491.224364
## iter 10 value 4097.861333
## iter 20 value 3755.654244
## iter 30 value 3750.306834
## iter 40 value 3744.652686
## iter 50 value 3711.212061
## iter 60 value 3575.099234
## iter 70 value 3121.971275
## iter 80 value 2872.104248
## iter 90 value 2787.910622
## iter 100 value 2764.287025
## final value 2764.287025
## stopped after 100 iterations
## # weights: 331
## initial value 7328.927693
## iter 10 value 4126.441466
## iter 20 value 3902.586962
## iter 30 value 3837.907013
## iter 40 value 3837.850674
## iter 50 value 3823.539237
## iter 60 value 3797.585765
## iter 70 value 3757.475874
## iter 80 value 3500.985892
## iter 90 value 3089.967250
## iter 100 value 2909.383887
## final value 2909.383887
## stopped after 100 iterations
## # weights: 551
## initial value 6156.949871
## iter 10 value 3966.138126
## iter 20 value 3907.377039
## iter 30 value 3765.908590
## iter 40 value 3707.496077
## iter 50 value 3648.096139
## iter 60 value 3615.076122
## iter 70 value 3581.723169
## iter 80 value 3238.768713
## iter 90 value 3072.668300
## iter 100 value 3033.286533
## final value 3033.286533
## stopped after 100 iterations
## # weights: 771
## initial value 4949.154681
## iter 10 value 3854.836778
## iter 20 value 3851.335917
## iter 30 value 3764.776338
## iter 40 value 3737.065252
## iter 50 value 3710.404421
## iter 60 value 3625.612965
## iter 70 value 3280.231298
## iter 80 value 2749.454021
## iter 90 value 2722.957190
## iter 100 value 2699.799230
## final value 2699.799230
## stopped after 100 iterations
## # weights: 221
## initial value 4295.359579
## iter 10 value 3925.069773
## iter 20 value 3825.114273
## iter 30 value 3797.539089
## iter 40 value 3732.515858
## iter 50 value 3609.111721
## iter 60 value 3420.992481
## iter 70 value 3049.563439
## iter 80 value 2725.689952
## iter 90 value 2650.596772
## iter 100 value 2607.810189
## final value 2607.810189
## stopped after 100 iterations
## # weights: 331
## initial value 5950.405546
## iter 10 value 4126.586661
## iter 20 value 4094.422619
## iter 30 value 3840.773642
## iter 40 value 3832.742687
## iter 50 value 3832.002081
## iter 60 value 3831.420317
## iter 70 value 3830.800340
## iter 80 value 3830.436191
## iter 90 value 3830.422366
## final value 3830.420321
## converged
## # weights: 551
## initial value 4836.904855
## iter 10 value 4111.481400
## iter 20 value 4070.875533
## iter 30 value 3850.849468
## iter 40 value 3832.858802
## iter 50 value 3826.881805
## iter 60 value 3815.986794
## iter 70 value 3793.570464
## iter 80 value 3745.296817
## iter 90 value 3652.796276
## iter 100 value 3627.201098
## final value 3627.201098
## stopped after 100 iterations
## # weights: 771
## initial value 4379.124380
## iter 10 value 4099.114087
## iter 20 value 3856.860997
## iter 30 value 3828.296311
## iter 40 value 3788.489318
## iter 50 value 3716.887428
## iter 60 value 3711.094545
## iter 70 value 3691.975024
## iter 80 value 3677.055422
## iter 90 value 3676.112732
## iter 100 value 3665.539608
## final value 3665.539608
## stopped after 100 iterations
## # weights: 221
## initial value 5557.546348
## iter 10 value 4102.208036
## iter 20 value 4093.977174
## iter 30 value 3835.871339
## iter 40 value 3788.198475
## iter 50 value 3732.013184
## iter 60 value 3681.676960
## iter 70 value 3658.840740
## iter 80 value 3652.611771
## iter 90 value 3627.436038
## iter 100 value 3560.783294
## final value 3560.783294
## stopped after 100 iterations
## # weights: 331
## initial value 4262.197874
## iter 10 value 4059.300752
## iter 20 value 3850.244005
## iter 30 value 3814.809816
## iter 40 value 3785.051632
## iter 50 value 3761.311384
## iter 60 value 3757.664670
## iter 70 value 3738.491713
## iter 80 value 3720.472360
## iter 90 value 3703.915130
## iter 100 value 3669.679958
## final value 3669.679958
## stopped after 100 iterations
## # weights: 551
## initial value 4438.611376
## iter 10 value 4101.508492
## iter 20 value 3894.257648
## iter 30 value 3849.477201
## iter 40 value 3838.129742
## iter 50 value 3769.867926
## iter 60 value 3743.777309
## iter 70 value 3726.246964
## iter 80 value 3494.449504
## iter 90 value 3243.380584
## iter 100 value 2676.119908
## final value 2676.119908
## stopped after 100 iterations
## # weights: 771
## initial value 4359.057615
## iter 10 value 3873.233825
## iter 20 value 3831.957303
## iter 30 value 3830.922685
## iter 40 value 3821.111122
## iter 50 value 3793.533805
## iter 60 value 3557.486726
## iter 70 value 3342.609575
## iter 80 value 2996.732542
## iter 90 value 2905.759064
## iter 100 value 2744.220898
## final value 2744.220898
## stopped after 100 iterations
## # weights: 771
## initial value 7445.709362
## iter 10 value 6190.527801
## final value 6190.524988
## converged
Adult_TDA_KDE_5.40.5_n2_NN1Fit0
## Neural Network
##
## 10351 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 6901, 6901, 6900
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.8046560 0.4825058
## 2 0.5 0.7658142 0.2882833
## 2 0.7 0.7475593 0.1818195
## 3 0.3 0.7803086 0.3405386
## 3 0.5 0.7750970 0.3246273
## 3 0.7 0.7856273 0.3713239
## 5 0.3 0.7634046 0.3048563
## 5 0.5 0.7561595 0.2305120
## 5 0.7 0.7833994 0.3738575
## 7 0.3 0.8073625 0.5451341
## 7 0.5 0.7883319 0.3828229
## 7 0.7 0.8042700 0.4681658
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 7 and decay = 0.3.
Adult_TDA_KDE_5.40.5_n2_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.7971014 0.5384148 Fold2
## 2 0.8272464 0.5692171 Fold1
## 3 0.7977398 0.5277704 Fold3
ad_tda_kde_5.40.5_n2_nn1_fit_re<-Adult_TDA_KDE_5.40.5_n2_NN1Fit0$resample[1]
summary(Adult_TDA_KDE_5.40.5_n2_NN1Fit0)
## a 108-7-1 network with 771 weights
## options were - entropy fitting decay=0.3
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.00 0.00 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 0.00 0.00 0.00 0.00
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 0.00 0.00 0.00
## b->h4 i1->h4 i2->h4 i3->h4 i4->h4 i5->h4 i6->h4 i7->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h4 i9->h4 i10->h4 i11->h4 i12->h4 i13->h4 i14->h4 i15->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h4 i17->h4 i18->h4 i19->h4 i20->h4 i21->h4 i22->h4 i23->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h4 i25->h4 i26->h4 i27->h4 i28->h4 i29->h4 i30->h4 i31->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h4 i33->h4 i34->h4 i35->h4 i36->h4 i37->h4 i38->h4 i39->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h4 i41->h4 i42->h4 i43->h4 i44->h4 i45->h4 i46->h4 i47->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h4 i49->h4 i50->h4 i51->h4 i52->h4 i53->h4 i54->h4 i55->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h4 i57->h4 i58->h4 i59->h4 i60->h4 i61->h4 i62->h4 i63->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h4 i65->h4 i66->h4 i67->h4 i68->h4 i69->h4 i70->h4 i71->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h4 i73->h4 i74->h4 i75->h4 i76->h4 i77->h4 i78->h4 i79->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h4 i81->h4 i82->h4 i83->h4 i84->h4 i85->h4 i86->h4 i87->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h4 i89->h4 i90->h4 i91->h4 i92->h4 i93->h4 i94->h4 i95->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h4 i97->h4 i98->h4 i99->h4 i100->h4 i101->h4 i102->h4 i103->h4
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4
## 0.00 0.00 0.00 0.00 0.00
## b->h5 i1->h5 i2->h5 i3->h5 i4->h5 i5->h5 i6->h5 i7->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h5 i9->h5 i10->h5 i11->h5 i12->h5 i13->h5 i14->h5 i15->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h5 i17->h5 i18->h5 i19->h5 i20->h5 i21->h5 i22->h5 i23->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h5 i25->h5 i26->h5 i27->h5 i28->h5 i29->h5 i30->h5 i31->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h5 i33->h5 i34->h5 i35->h5 i36->h5 i37->h5 i38->h5 i39->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h5 i41->h5 i42->h5 i43->h5 i44->h5 i45->h5 i46->h5 i47->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h5 i49->h5 i50->h5 i51->h5 i52->h5 i53->h5 i54->h5 i55->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h5 i57->h5 i58->h5 i59->h5 i60->h5 i61->h5 i62->h5 i63->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h5 i65->h5 i66->h5 i67->h5 i68->h5 i69->h5 i70->h5 i71->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h5 i73->h5 i74->h5 i75->h5 i76->h5 i77->h5 i78->h5 i79->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h5 i81->h5 i82->h5 i83->h5 i84->h5 i85->h5 i86->h5 i87->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h5 i89->h5 i90->h5 i91->h5 i92->h5 i93->h5 i94->h5 i95->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h5 i97->h5 i98->h5 i99->h5 i100->h5 i101->h5 i102->h5 i103->h5
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5
## 0.00 0.00 0.00 0.00 0.00
## b->h6 i1->h6 i2->h6 i3->h6 i4->h6 i5->h6 i6->h6 i7->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h6 i9->h6 i10->h6 i11->h6 i12->h6 i13->h6 i14->h6 i15->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h6 i17->h6 i18->h6 i19->h6 i20->h6 i21->h6 i22->h6 i23->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h6 i25->h6 i26->h6 i27->h6 i28->h6 i29->h6 i30->h6 i31->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h6 i33->h6 i34->h6 i35->h6 i36->h6 i37->h6 i38->h6 i39->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h6 i41->h6 i42->h6 i43->h6 i44->h6 i45->h6 i46->h6 i47->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h6 i49->h6 i50->h6 i51->h6 i52->h6 i53->h6 i54->h6 i55->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h6 i57->h6 i58->h6 i59->h6 i60->h6 i61->h6 i62->h6 i63->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h6 i65->h6 i66->h6 i67->h6 i68->h6 i69->h6 i70->h6 i71->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h6 i73->h6 i74->h6 i75->h6 i76->h6 i77->h6 i78->h6 i79->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h6 i81->h6 i82->h6 i83->h6 i84->h6 i85->h6 i86->h6 i87->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h6 i89->h6 i90->h6 i91->h6 i92->h6 i93->h6 i94->h6 i95->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h6 i97->h6 i98->h6 i99->h6 i100->h6 i101->h6 i102->h6 i103->h6
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h6 i105->h6 i106->h6 i107->h6 i108->h6
## 0.00 0.00 0.00 0.00 0.00
## b->h7 i1->h7 i2->h7 i3->h7 i4->h7 i5->h7 i6->h7 i7->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h7 i9->h7 i10->h7 i11->h7 i12->h7 i13->h7 i14->h7 i15->h7
## 0.00 0.00 0.00 -0.01 0.00 0.00 0.00 0.00
## i16->h7 i17->h7 i18->h7 i19->h7 i20->h7 i21->h7 i22->h7 i23->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h7 i25->h7 i26->h7 i27->h7 i28->h7 i29->h7 i30->h7 i31->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h7 i33->h7 i34->h7 i35->h7 i36->h7 i37->h7 i38->h7 i39->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h7 i41->h7 i42->h7 i43->h7 i44->h7 i45->h7 i46->h7 i47->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h7 i49->h7 i50->h7 i51->h7 i52->h7 i53->h7 i54->h7 i55->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h7 i57->h7 i58->h7 i59->h7 i60->h7 i61->h7 i62->h7 i63->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h7 i65->h7 i66->h7 i67->h7 i68->h7 i69->h7 i70->h7 i71->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h7 i73->h7 i74->h7 i75->h7 i76->h7 i77->h7 i78->h7 i79->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h7 i81->h7 i82->h7 i83->h7 i84->h7 i85->h7 i86->h7 i87->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h7 i89->h7 i90->h7 i91->h7 i92->h7 i93->h7 i94->h7 i95->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h7 i97->h7 i98->h7 i99->h7 i100->h7 i101->h7 i102->h7 i103->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h7 i105->h7 i106->h7 i107->h7 i108->h7
## 0.00 0.00 0.00 0.00 0.00
## b->o h1->o h2->o h3->o h4->o h5->o h6->o h7->o
## -0.23 -0.23 0.00 0.00 0.00 -0.23 -0.23 0.00
vip(Adult_TDA_KDE_5.40.5_n2_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.40.5_n2_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.40.5_n2_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n2_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n2_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n2_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n2_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n2_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_kde_5.40.5_n2_nn1_cf0_ov_acc<-ad_tda_kde_5.40.5_n2_nn1_cf0$overall[1]
ad_tda_kde_5.40.5_n2_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_kde_5.40.5_n2_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n2_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_nn1_n2_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.40.5_n2_nn1_fit_re)
diff_tda_kde_5.40.5_nn1_n2_3_fold
## Accuracy
## 1 0.05283932
## 2 -0.03427454
## 3 0.01931733
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_nn1.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nn1_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_nn1.n2_3_fold
## $probLeft
## [1] 0.25
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_nn1.n2_3_fold_odds.left<-bst_tda_kde_5.40.5_nn1.n2_3_fold$probLeft/bst_tda_kde_5.40.5_nn1.n2_3_fold$probRight
bst_tda_kde_5.40.5_nn1.n2_3_fold_odds.left
## [1] 0.5
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_nn1.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nn1_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_nn1.n2_3_fold
## $winLeft
## [1] 0.1336667
##
## $winRope
## [1] 0.3726
##
## $winRight
## [1] 0.4937333
# Bayesian Correlated Test
bct_tda_kde_5.40.5_nn1.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nn1_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nn1.n2_3_fold
## $left
## [1] 0.2603248
##
## $rope
## [1] 0.2080283
##
## $right
## [1] 0.5316468
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_nn1_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_nn1.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nn1_n2_3_fold))
#bf_tda_kde_5.40.5_nn1.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_nn1_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_nn1_n2_3_fold)
## t = 0.49775, df = 2, p-value = 0.668
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.09652703 0.12178177
## sample estimates:
## mean of x
## 0.01262737
### Test set diff
diff_tda_kde_5.40.5_nn1.n2_test<-(nn1_cf_ov_acc - ad_tda_kde_5.40.5_n2_nn1_cf0_ov_acc)
diff_tda_kde_5.40.5_nn1.n2_test
## Accuracy
## 0
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_nn1.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nn1.n2_test),-0.01,0.01)
bst_tda_kde_5.40.5_nn1.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_nn1.n2_test_odds.left<-bst_tda_kde_5.40.5_nn1.n2_test$probLeft/bst_tda_kde_5.40.5_nn1.n2_test$probRight
bst_tda_kde_5.40.5_nn1.n2_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_nn1.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nn1.n2_test),-0.01,0.01)
bsr_tda_kde_5.40.5_nn1.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_nn1.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nn1.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nn1.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_nn1.n2_test)))
#BayesFactor
#bf_tda_kde_5.40.5_nn1.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nn1.n2_test)) #bf_tda_pca_5.40.5_nn1.n2_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_nn1.n2_test))
##Node3
#Neural Network 1
Adult_TDA_KDE_5.40.5_n3_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n3.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 4284.963875
## iter 10 value 4127.171279
## final value 4127.170805
## converged
## # weights: 331
## initial value 4524.370765
## iter 10 value 4098.857804
## iter 20 value 3867.528592
## iter 30 value 3787.424364
## iter 40 value 3708.899162
## iter 50 value 3681.789135
## iter 60 value 3547.279000
## iter 70 value 3005.076305
## iter 80 value 2901.935226
## iter 90 value 2635.381343
## iter 100 value 2478.673740
## final value 2478.673740
## stopped after 100 iterations
## # weights: 551
## initial value 4294.354995
## iter 10 value 3874.087330
## iter 20 value 3846.713162
## iter 30 value 3834.765428
## iter 40 value 3828.873553
## iter 50 value 3819.859015
## iter 60 value 3792.897364
## iter 70 value 3774.315636
## iter 80 value 3693.523612
## iter 90 value 3365.249146
## iter 100 value 2808.301249
## final value 2808.301249
## stopped after 100 iterations
## # weights: 771
## initial value 4572.009160
## iter 10 value 4123.842555
## iter 20 value 3869.310587
## iter 30 value 3804.440678
## iter 40 value 3762.472270
## iter 50 value 3758.172584
## iter 60 value 3720.406899
## iter 70 value 3718.412605
## iter 80 value 3693.444249
## iter 90 value 3673.376105
## iter 100 value 3635.147299
## final value 3635.147299
## stopped after 100 iterations
## # weights: 221
## initial value 4113.022918
## iter 10 value 3848.056877
## iter 20 value 3840.598819
## iter 30 value 3836.066823
## iter 40 value 3795.535036
## iter 50 value 3783.111060
## iter 60 value 3767.737887
## iter 70 value 3745.457228
## iter 80 value 3711.578681
## iter 90 value 3673.846608
## iter 100 value 3516.220359
## final value 3516.220359
## stopped after 100 iterations
## # weights: 331
## initial value 4350.720645
## iter 10 value 4094.103130
## iter 20 value 3926.643631
## iter 30 value 3925.374523
## iter 40 value 3906.240120
## iter 50 value 3849.655987
## iter 60 value 3832.518092
## iter 70 value 3785.533403
## iter 80 value 3507.244949
## iter 90 value 2975.819917
## iter 100 value 2799.453444
## final value 2799.453444
## stopped after 100 iterations
## # weights: 551
## initial value 6938.510951
## iter 10 value 4090.348767
## iter 20 value 3925.838714
## iter 30 value 3835.095293
## iter 40 value 3676.987123
## iter 50 value 3400.109678
## iter 60 value 3020.959504
## iter 70 value 2889.059047
## iter 80 value 2673.226083
## iter 90 value 2545.915454
## iter 100 value 2498.194141
## final value 2498.194141
## stopped after 100 iterations
## # weights: 771
## initial value 6162.111056
## iter 10 value 4026.539784
## iter 20 value 3879.263743
## iter 30 value 3779.802918
## iter 40 value 3761.242528
## iter 50 value 3749.893292
## iter 60 value 3561.180666
## iter 70 value 3255.175547
## iter 80 value 2848.295985
## iter 90 value 2685.853163
## iter 100 value 2663.797795
## final value 2663.797795
## stopped after 100 iterations
## # weights: 221
## initial value 4695.935024
## iter 10 value 4110.765981
## iter 20 value 3963.522646
## iter 30 value 3952.749159
## iter 40 value 3894.851720
## iter 50 value 3880.373644
## iter 60 value 3861.812484
## iter 70 value 3861.645184
## iter 80 value 3808.394874
## iter 90 value 3765.226962
## iter 100 value 3737.168828
## final value 3737.168828
## stopped after 100 iterations
## # weights: 331
## initial value 4227.999486
## iter 10 value 4127.482106
## iter 20 value 3914.390968
## iter 30 value 3859.910375
## iter 40 value 3768.350957
## iter 50 value 3755.341888
## iter 60 value 3717.729016
## iter 70 value 3703.943711
## iter 80 value 3690.983658
## iter 90 value 3684.704483
## iter 100 value 3676.404111
## final value 3676.404111
## stopped after 100 iterations
## # weights: 551
## initial value 4323.901844
## iter 10 value 4132.288525
## iter 20 value 4030.461842
## iter 30 value 3868.158921
## iter 40 value 3829.320389
## iter 50 value 3803.540615
## iter 60 value 3359.265011
## iter 70 value 2732.573018
## iter 80 value 2687.051141
## iter 90 value 2537.984469
## iter 100 value 2449.083739
## final value 2449.083739
## stopped after 100 iterations
## # weights: 771
## initial value 5318.794661
## iter 10 value 3892.179134
## iter 20 value 3829.431937
## iter 30 value 3781.001717
## iter 40 value 3758.943034
## iter 50 value 3750.745640
## iter 60 value 3745.957505
## iter 70 value 3725.500656
## iter 80 value 3680.274071
## iter 90 value 3670.272373
## iter 100 value 3665.606503
## final value 3665.606503
## stopped after 100 iterations
## # weights: 221
## initial value 5202.800275
## iter 10 value 4047.425161
## iter 20 value 3854.477378
## iter 30 value 3852.022530
## iter 40 value 3851.402976
## iter 40 value 3851.402941
## iter 50 value 3850.364328
## iter 60 value 3850.347352
## iter 70 value 3837.272104
## iter 80 value 3739.301836
## iter 90 value 3675.466125
## iter 100 value 3626.760281
## final value 3626.760281
## stopped after 100 iterations
## # weights: 331
## initial value 5580.663131
## iter 10 value 4127.000096
## iter 20 value 3878.122661
## iter 30 value 3839.358817
## iter 40 value 3766.240922
## iter 50 value 3732.734724
## iter 60 value 3688.598306
## iter 70 value 3632.106040
## iter 80 value 3568.298940
## iter 90 value 3562.772470
## iter 100 value 3473.217577
## final value 3473.217577
## stopped after 100 iterations
## # weights: 551
## initial value 4406.001934
## iter 10 value 4093.804802
## iter 20 value 3890.810320
## iter 30 value 3873.069513
## iter 40 value 3853.617247
## iter 50 value 3852.960106
## iter 50 value 3852.960094
## iter 60 value 3849.514413
## iter 70 value 3763.601897
## iter 80 value 3759.724088
## iter 90 value 3758.210317
## iter 100 value 3741.905631
## final value 3741.905631
## stopped after 100 iterations
## # weights: 771
## initial value 4238.568334
## iter 10 value 3941.257057
## iter 20 value 3867.850595
## iter 30 value 3861.631353
## iter 40 value 3848.475616
## iter 50 value 3843.855029
## iter 60 value 3840.778150
## iter 70 value 3775.611123
## iter 80 value 3741.293767
## iter 90 value 3720.502355
## iter 100 value 3704.268451
## final value 3704.268451
## stopped after 100 iterations
## # weights: 221
## initial value 4149.442533
## iter 10 value 4023.068192
## iter 20 value 3794.712880
## iter 30 value 3742.239143
## iter 40 value 3722.376086
## iter 50 value 3718.252005
## iter 60 value 3699.894223
## iter 70 value 3684.029733
## iter 80 value 3654.770737
## iter 90 value 3599.013302
## iter 100 value 3205.634089
## final value 3205.634089
## stopped after 100 iterations
## # weights: 331
## initial value 5853.357015
## iter 10 value 4126.940624
## iter 20 value 3983.410001
## iter 30 value 3823.835566
## iter 40 value 3803.488634
## iter 50 value 3772.297147
## iter 60 value 3769.949787
## iter 70 value 3767.490100
## iter 80 value 3766.545343
## iter 90 value 3766.130859
## iter 100 value 3765.692043
## final value 3765.692043
## stopped after 100 iterations
## # weights: 551
## initial value 5501.392123
## iter 10 value 3986.849730
## iter 20 value 3851.949155
## iter 30 value 3809.592306
## iter 40 value 3767.763868
## iter 50 value 3757.711060
## iter 60 value 3744.438509
## iter 70 value 3726.783441
## iter 80 value 3669.591159
## iter 90 value 3523.077368
## iter 100 value 3447.765363
## final value 3447.765363
## stopped after 100 iterations
## # weights: 771
## initial value 4315.642672
## iter 10 value 4098.630220
## iter 20 value 4095.485797
## iter 30 value 3823.705345
## iter 40 value 3757.079152
## iter 50 value 3749.321834
## iter 60 value 3712.967373
## iter 70 value 3591.146058
## iter 80 value 3486.291769
## iter 90 value 3152.364398
## iter 100 value 2978.220547
## final value 2978.220547
## stopped after 100 iterations
## # weights: 221
## initial value 4920.702395
## iter 10 value 4089.027364
## iter 20 value 3880.531376
## iter 30 value 3808.984480
## iter 40 value 3711.326485
## iter 50 value 3677.916574
## iter 60 value 3573.468188
## iter 70 value 3326.116119
## iter 80 value 3165.755182
## iter 90 value 3021.158202
## iter 100 value 2822.979030
## final value 2822.979030
## stopped after 100 iterations
## # weights: 331
## initial value 4272.888430
## iter 10 value 3895.353038
## iter 20 value 3813.637797
## iter 30 value 3778.568556
## iter 40 value 3758.857236
## iter 50 value 3736.489930
## iter 60 value 3722.652363
## iter 70 value 3694.370844
## iter 80 value 3633.894809
## iter 90 value 3453.474565
## iter 100 value 2909.602849
## final value 2909.602849
## stopped after 100 iterations
## # weights: 551
## initial value 6154.246132
## iter 10 value 4128.446705
## iter 20 value 4126.904870
## iter 30 value 4121.483228
## iter 40 value 4052.098661
## iter 50 value 3864.526061
## iter 60 value 3851.788141
## iter 70 value 3819.674344
## iter 80 value 3779.223130
## iter 90 value 3761.463938
## iter 100 value 3645.183824
## final value 3645.183824
## stopped after 100 iterations
## # weights: 771
## initial value 8663.008833
## iter 10 value 4099.203777
## iter 20 value 3811.540984
## iter 30 value 3771.878962
## iter 40 value 3541.768057
## iter 50 value 3289.864329
## iter 60 value 2912.533544
## iter 70 value 2601.208106
## iter 80 value 2565.665042
## iter 90 value 2551.590029
## iter 100 value 2495.214685
## final value 2495.214685
## stopped after 100 iterations
## # weights: 221
## initial value 4965.137410
## iter 10 value 4127.283651
## iter 20 value 3845.386550
## iter 30 value 3800.529844
## iter 40 value 3796.491706
## iter 50 value 3787.779756
## iter 60 value 3769.172549
## iter 70 value 3741.947414
## iter 80 value 3733.434110
## iter 90 value 3718.522029
## iter 100 value 3703.984975
## final value 3703.984975
## stopped after 100 iterations
## # weights: 331
## initial value 5805.953376
## iter 10 value 4125.312795
## iter 20 value 3994.256242
## iter 30 value 3784.273013
## iter 40 value 3773.739414
## iter 50 value 3723.993945
## iter 60 value 3624.082097
## iter 70 value 2989.123574
## iter 80 value 2720.554871
## iter 90 value 2570.510767
## iter 100 value 2412.110592
## final value 2412.110592
## stopped after 100 iterations
## # weights: 551
## initial value 5077.329769
## iter 10 value 4127.139574
## final value 4127.137274
## converged
## # weights: 771
## initial value 4222.130083
## iter 10 value 3868.341544
## iter 20 value 3835.755599
## iter 30 value 3832.681920
## iter 40 value 3832.075712
## iter 50 value 3811.838846
## iter 60 value 3747.902815
## iter 70 value 3708.906488
## iter 80 value 3507.557008
## iter 90 value 3113.771536
## iter 100 value 2998.867254
## final value 2998.867254
## stopped after 100 iterations
## # weights: 221
## initial value 4596.505391
## iter 10 value 4114.142239
## iter 20 value 4100.978618
## iter 30 value 3825.983353
## iter 40 value 3788.502085
## iter 50 value 3727.715333
## iter 60 value 3698.055999
## iter 70 value 3664.226434
## iter 80 value 3655.982689
## iter 90 value 3606.221814
## iter 100 value 3510.205156
## final value 3510.205156
## stopped after 100 iterations
## # weights: 331
## initial value 4283.856703
## iter 10 value 4100.910182
## iter 20 value 3873.487416
## iter 30 value 3871.357206
## iter 40 value 3870.893123
## iter 50 value 3813.568735
## iter 60 value 3725.229402
## iter 70 value 3674.093889
## iter 80 value 3664.532453
## iter 90 value 3500.516458
## iter 100 value 3129.353137
## final value 3129.353137
## stopped after 100 iterations
## # weights: 551
## initial value 4181.766311
## iter 10 value 4128.560904
## iter 20 value 4127.311556
## iter 30 value 4127.297152
## final value 4127.297057
## converged
## # weights: 771
## initial value 4301.684690
## iter 10 value 4047.754001
## iter 20 value 3858.900901
## iter 30 value 3855.938766
## iter 40 value 3847.513740
## iter 50 value 3843.654602
## iter 60 value 3758.511256
## iter 70 value 3745.988378
## iter 80 value 3730.897838
## iter 90 value 3700.306316
## iter 100 value 3688.076635
## final value 3688.076635
## stopped after 100 iterations
## # weights: 221
## initial value 4247.270749
## iter 10 value 4050.397137
## iter 20 value 3838.755899
## iter 30 value 3837.620818
## final value 3837.076827
## converged
## # weights: 331
## initial value 5339.979424
## iter 10 value 4127.048943
## iter 20 value 4126.218238
## iter 30 value 3917.853054
## iter 40 value 3806.296101
## iter 50 value 3784.460481
## iter 60 value 3748.319620
## iter 70 value 3740.299518
## iter 80 value 3737.486576
## iter 90 value 3734.712865
## iter 100 value 3718.296732
## final value 3718.296732
## stopped after 100 iterations
## # weights: 551
## initial value 5519.487886
## iter 10 value 4097.049493
## iter 20 value 3860.228894
## iter 30 value 3841.875110
## iter 40 value 3824.159833
## iter 50 value 3822.988143
## iter 60 value 3822.874562
## iter 70 value 3805.451554
## iter 80 value 3729.572345
## iter 90 value 3709.366330
## iter 100 value 3690.409458
## final value 3690.409458
## stopped after 100 iterations
## # weights: 771
## initial value 7655.826742
## iter 10 value 4105.427999
## iter 20 value 3860.911668
## iter 30 value 3841.403289
## iter 40 value 3766.192253
## iter 50 value 3717.641773
## iter 60 value 3635.186838
## iter 70 value 3437.761668
## iter 80 value 2992.388826
## iter 90 value 2842.432785
## iter 100 value 2740.019011
## final value 2740.019011
## stopped after 100 iterations
## # weights: 331
## initial value 6949.098549
## iter 10 value 5881.219428
## iter 20 value 5801.594500
## iter 30 value 5761.688110
## iter 40 value 5697.122456
## iter 50 value 5558.872204
## iter 60 value 5456.406202
## iter 70 value 5296.305274
## iter 80 value 5056.530139
## iter 90 value 4471.404179
## iter 100 value 3978.123817
## final value 3978.123817
## stopped after 100 iterations
Adult_TDA_KDE_5.40.5_n3_NN1Fit0
## Neural Network
##
## 10351 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 6901, 6900, 6901
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.7491046 0.1925832
## 2 0.5 0.7763478 0.3647208
## 2 0.7 0.7770220 0.3497923
## 3 0.3 0.8063984 0.4840893
## 3 0.5 0.7860137 0.4246793
## 3 0.7 0.7706494 0.3054891
## 5 0.3 0.7581874 0.2636821
## 5 0.5 0.7679447 0.2795026
## 5 0.7 0.7821492 0.3636602
## 7 0.3 0.7626316 0.3356978
## 7 0.5 0.7938351 0.4532893
## 7 0.7 0.8005960 0.4742076
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.3.
Adult_TDA_KDE_5.40.5_n3_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.8133333 0.5609343 Fold3
## 2 0.7765865 0.3114555 Fold2
## 3 0.8292754 0.5798781 Fold1
ad_tda_kde_5.40.5_n3_nn1_fit_re<-Adult_TDA_KDE_5.40.5_n3_NN1Fit0$resample[1]
summary(Adult_TDA_KDE_5.40.5_n3_NN1Fit0)
## a 108-3-1 network with 331 weights
## options were - entropy fitting decay=0.3
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## -1.46 0.11 -0.58 0.92 -0.13 0.00 0.22 -0.14
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## -1.32 -0.38 -0.05 0.00 0.00 0.00 -0.99 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 -0.11 -0.42 1.18 0.00 -1.85
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 1.89 0.00 0.00 -1.16 -0.30 -0.67 0.18 1.51
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## -0.25 -1.38 -0.75 -0.10 -0.58 0.14 -0.02 0.27
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## 0.96 -1.36 -0.96 -1.12 -1.55 -0.09 0.96 0.82
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.91 0.73 -0.58 -0.70 0.12 -0.58 -2.02 -1.03
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 2.75 -0.31 0.14 -0.49 -0.43 -0.36 -1.00 -0.46
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## -0.09 0.00 0.03 -0.57 0.14 0.09 -0.22 -0.09
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.11 -0.04 -0.13 -0.03 0.04 0.05 -0.02 -0.14
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## -0.03 0.02 0.00 -0.01 -0.01 -0.07 -0.06 -0.04
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.03 -0.04 -0.03 0.13 -0.03 -0.41 0.00 -0.01
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## -0.07 0.42 -0.08 -0.01 -0.27 -0.05 -0.17 -0.10
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## -0.01 0.00 0.42 -0.12 -0.06
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## -0.08 -0.96 -0.08 0.10 -0.27 0.00 0.63 -0.01
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## -0.51 0.05 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.03 0.04 -0.08 0.00 -0.17
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.25 0.00 0.00 -0.15 0.21 -0.24 -0.01 -0.08
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.03 0.20 -0.01 0.03 -0.08 0.11 0.00 -0.11
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.51 -0.21 -0.06 -0.05 -0.11 0.00 -0.15 -0.04
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.03 0.04 0.02 -0.13 0.10 -0.06 0.06 -0.13
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 0.08 -0.01 -0.09 -0.22 0.01 0.24 -0.07 -0.01
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.31 -0.32 0.04 0.00 -0.02 0.00 0.00
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## 0.01 0.02 0.00 -0.02 -0.02 -0.02 0.05 0.01
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.00 0.00 0.00 0.00 0.00 0.00 -0.02 0.00
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.00 -0.01 -0.01 -0.05 0.00 0.01 0.00 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.00 -0.03 0.03 0.02 0.02 -0.02 0.00 0.00
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## 0.00 -0.01 -0.07 0.02 0.00
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 0.06 0.01 -0.21 -0.02 -0.32 -0.01 0.10 -0.09
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 0.24 0.29 0.09 0.00 0.00 0.00 0.20 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 0.00 0.00 -0.09 -0.17 0.46 0.00 -0.21
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## -0.29 0.00 0.00 0.16 0.27 0.17 -0.02 0.00
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## -0.01 -0.34 0.27 -0.02 -0.22 -0.09 0.00 0.53
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## -0.09 0.59 0.14 -0.21 0.08 0.04 0.31 -0.28
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.03 -0.28 -0.51 -0.54 0.48 0.17 -0.10 -0.34
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.40 0.02 0.17 -0.11 0.06 -0.08 0.24 -0.18
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.00 0.02 0.02 0.16 -0.03 -0.02 0.03 0.02
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## -0.03 0.02 0.02 0.05 -0.02 -0.02 -0.07 0.02
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.00 0.02 0.01 0.02
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 -0.07 0.02 -0.02 0.00 0.04 -0.02 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.01 -0.08 0.01 -0.02 0.06 0.00 0.02 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 -0.27 0.18 0.01
## b->o h1->o h2->o h3->o
## 2.33 3.79 0.94 -6.02
vip(Adult_TDA_KDE_5.40.5_n3_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.40.5_n3_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.40.5_n3_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n3_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n3_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n3_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6417 1059
## >50K 999 1293
##
## Accuracy : 0.7893
## 95% CI : (0.7811, 0.7974)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 9.018e-13
##
## Kappa : 0.4187
##
## Mcnemar's Test P-Value : 0.1934
##
## Sensitivity : 0.8653
## Specificity : 0.5497
## Pos Pred Value : 0.8583
## Neg Pred Value : 0.5641
## Prevalence : 0.7592
## Detection Rate : 0.6569
## Detection Prevalence : 0.7654
## Balanced Accuracy : 0.7075
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n3_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6417 1059
## >50K 999 1293
##
## Accuracy : 0.7893
## 95% CI : (0.7811, 0.7974)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 9.018e-13
##
## Kappa : 0.4187
##
## Mcnemar's Test P-Value : 0.1934
##
## Sensitivity : 0.8653
## Specificity : 0.5497
## Pos Pred Value : 0.8583
## Neg Pred Value : 0.5641
## Prevalence : 0.7592
## Detection Rate : 0.6569
## Detection Prevalence : 0.7654
## Balanced Accuracy : 0.7075
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n3_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.893120e-01 4.186828e-01 7.810881e-01 7.973631e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 9.018235e-13 1.934102e-01
ad_tda_kde_5.40.5_n3_nn1_cf0_ov_acc<-ad_tda_kde_5.40.5_n3_nn1_cf0$overall[1]
ad_tda_kde_5.40.5_n3_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.8652913 0.5497449 0.8583467
## Neg Pred Value Precision Recall
## 0.5641361 0.8583467 0.8652913
## F1 Prevalence Detection Rate
## 0.8618050 0.7592138 0.6569410
## Detection Prevalence Balanced Accuracy
## 0.7653563 0.7075181
ad_tda_kde_5.40.5_n3_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n3_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_nn1_n3_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.40.5_n3_nn1_fit_re)
diff_tda_kde_5.40.5_nn1_n3_3_fold
## Accuracy
## 1 0.03660743
## 2 0.01638534
## 3 -0.01221824
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_nn1.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nn1_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_nn1.n3_3_fold
## $probLeft
## [1] 0.25
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_nn1.n3_3_fold_odds.left<-bst_tda_kde_5.40.5_nn1.n3_3_fold$probLeft/bst_tda_kde_5.40.5_nn1.n3_3_fold$probRight
bst_tda_kde_5.40.5_nn1.n3_3_fold_odds.left
## [1] 0.5
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_nn1.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nn1_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_nn1.n3_3_fold
## $winLeft
## [1] 0.08056667
##
## $winRope
## [1] 0.2424667
##
## $winRight
## [1] 0.6769667
# Bayesian Correlated Test
bct_tda_kde_5.40.5_nn1.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nn1_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nn1.n3_3_fold
## $left
## [1] 0.1429672
##
## $rope
## [1] 0.2803128
##
## $right
## [1] 0.57672
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_nn1_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_nn1.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nn1_n3_3_fold))
#bf_tda_kde_5.40.5_nn1.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_nn1_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_nn1_n3_3_fold)
## t = 0.95959, df = 2, p-value = 0.4385
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.04735046 0.07453348
## sample estimates:
## mean of x
## 0.01359151
### Test set diff
diff_tda_kde_5.40.5_nn1.n3_test<-(nn1_cf_ov_acc - ad_tda_kde_5.40.5_n3_nn1_cf0_ov_acc)
diff_tda_kde_5.40.5_nn1.n3_test
## Accuracy
## -0.03009828
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_nn1.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nn1.n3_test),-0.01,0.01)
bst_tda_kde_5.40.5_nn1.n3_test
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_nn1.n3_test_odds.left<-bst_tda_kde_5.40.5_nn1.n3_test$probLeft/bst_tda_kde_5.40.5_nn1.n3_test$probRight
bst_tda_kde_5.40.5_nn1.n3_test_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_nn1.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nn1.n3_test),-0.01,0.01)
bsr_tda_kde_5.40.5_nn1.n3_test
## $winLeft
## [1] 0.8431
##
## $winRope
## [1] 0.1569
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_nn1.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nn1.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nn1.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_nn1.n3_test)))
#BayesFactor
#bf_tda_kde_5.40.5_nn1.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nn1.n3_test)) #bf_tda_pca_5.40.5_nn1.n3_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_nn1.n3_test))
##Node4
#Neural Network 1
Adult_TDA_KDE_5.40.5_n4_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n4.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 6532.964301
## iter 10 value 2805.881588
## iter 20 value 2703.185477
## iter 30 value 2552.622009
## iter 40 value 2290.647844
## iter 50 value 2147.218792
## iter 60 value 2124.117632
## iter 70 value 2084.992182
## iter 80 value 1739.556185
## iter 90 value 1675.863666
## iter 100 value 1643.989968
## final value 1643.989968
## stopped after 100 iterations
## # weights: 331
## initial value 4023.815010
## iter 10 value 2876.361109
## iter 20 value 2810.086908
## iter 30 value 2809.873229
## iter 40 value 2743.131163
## iter 50 value 2737.872235
## iter 60 value 2703.971046
## iter 70 value 2537.772828
## iter 80 value 2498.965736
## iter 90 value 2457.520583
## iter 100 value 2357.392411
## final value 2357.392411
## stopped after 100 iterations
## # weights: 551
## initial value 3068.833032
## iter 10 value 2926.491576
## iter 20 value 2817.992087
## iter 30 value 2716.534591
## iter 40 value 2711.834055
## iter 50 value 2706.008801
## iter 60 value 2652.029328
## iter 70 value 2642.422282
## iter 80 value 2638.941754
## iter 90 value 2625.735792
## iter 100 value 2551.406967
## final value 2551.406967
## stopped after 100 iterations
## # weights: 771
## initial value 3991.587074
## iter 10 value 2923.271181
## iter 20 value 2772.191798
## iter 30 value 2741.592051
## iter 40 value 2733.623021
## iter 50 value 2697.572746
## iter 60 value 2668.422038
## iter 70 value 2564.888047
## iter 80 value 2538.141864
## iter 90 value 2444.348867
## iter 100 value 2247.630005
## final value 2247.630005
## stopped after 100 iterations
## # weights: 221
## initial value 3467.934860
## iter 10 value 2924.813070
## iter 20 value 2899.272947
## iter 30 value 2898.625741
## iter 30 value 2898.625734
## iter 40 value 2895.778152
## iter 50 value 2834.937279
## iter 60 value 2749.925467
## iter 70 value 2596.938753
## iter 80 value 2282.917064
## iter 90 value 2092.401815
## iter 100 value 1964.821872
## final value 1964.821872
## stopped after 100 iterations
## # weights: 331
## initial value 3160.003171
## iter 10 value 2926.767964
## iter 20 value 2926.003093
## iter 30 value 2725.103440
## iter 40 value 2684.623734
## iter 50 value 2671.889828
## iter 60 value 2642.782898
## iter 70 value 2611.531914
## iter 80 value 2591.246457
## iter 90 value 2360.987528
## iter 100 value 2175.744076
## final value 2175.744076
## stopped after 100 iterations
## # weights: 551
## initial value 4490.226373
## iter 10 value 2941.718312
## iter 20 value 2941.684870
## iter 30 value 2941.579352
## iter 40 value 2771.271664
## iter 50 value 2767.536558
## iter 60 value 2757.481162
## iter 70 value 2754.927016
## iter 80 value 2753.674761
## iter 90 value 2742.193336
## final value 2733.204184
## converged
## # weights: 771
## initial value 5626.314344
## iter 10 value 2797.766563
## iter 20 value 2753.662710
## iter 30 value 2736.469589
## iter 40 value 2728.263802
## iter 50 value 2715.968716
## iter 60 value 2705.452948
## iter 70 value 2701.114964
## iter 80 value 2699.279865
## iter 90 value 2697.259925
## iter 100 value 2687.959242
## final value 2687.959242
## stopped after 100 iterations
## # weights: 221
## initial value 4292.873784
## iter 10 value 2892.243441
## iter 20 value 2839.321422
## iter 30 value 2834.803905
## iter 40 value 2737.095326
## iter 50 value 2586.729322
## iter 60 value 2412.429338
## iter 70 value 2194.116588
## iter 80 value 2001.810612
## iter 90 value 1890.887861
## iter 100 value 1838.025779
## final value 1838.025779
## stopped after 100 iterations
## # weights: 331
## initial value 3658.265907
## iter 10 value 2929.367629
## iter 20 value 2753.478480
## iter 30 value 2719.093915
## iter 40 value 2679.800278
## iter 50 value 2675.092508
## iter 60 value 2671.921339
## iter 60 value 2671.921326
## final value 2671.911947
## converged
## # weights: 551
## initial value 3972.781419
## iter 10 value 2931.271567
## iter 20 value 2734.440426
## iter 30 value 2722.195104
## iter 40 value 2713.997901
## iter 50 value 2709.070407
## iter 60 value 2708.175113
## iter 70 value 2704.282712
## iter 80 value 2662.629499
## iter 90 value 2653.846772
## iter 100 value 2490.106090
## final value 2490.106090
## stopped after 100 iterations
## # weights: 771
## initial value 6277.153227
## iter 10 value 2947.340059
## iter 20 value 2843.559539
## iter 30 value 2324.406739
## iter 40 value 2043.989335
## iter 50 value 1841.743362
## iter 60 value 1731.962716
## iter 70 value 1718.626033
## iter 80 value 1709.047014
## iter 90 value 1705.273831
## iter 100 value 1704.738628
## final value 1704.738628
## stopped after 100 iterations
## # weights: 221
## initial value 3710.591361
## iter 10 value 2914.647417
## iter 20 value 2761.703044
## iter 30 value 2742.084912
## iter 40 value 2738.526729
## iter 50 value 2733.210459
## iter 60 value 2708.006080
## iter 70 value 2543.731213
## iter 80 value 2496.321884
## iter 90 value 2429.214709
## iter 100 value 2123.727564
## final value 2123.727564
## stopped after 100 iterations
## # weights: 331
## initial value 3013.248408
## iter 10 value 2765.792460
## iter 20 value 2720.375882
## iter 30 value 2695.401475
## iter 40 value 2653.588683
## iter 50 value 2603.079192
## iter 60 value 2588.789624
## iter 70 value 2583.762881
## iter 80 value 2552.074766
## iter 90 value 2295.182232
## iter 100 value 1895.173971
## final value 1895.173971
## stopped after 100 iterations
## # weights: 551
## initial value 7794.113473
## iter 10 value 3197.150737
## iter 20 value 2909.182277
## iter 30 value 2752.469953
## iter 40 value 2746.646982
## iter 50 value 2743.757349
## iter 60 value 2691.068531
## iter 70 value 2680.097901
## iter 80 value 2676.505460
## iter 90 value 2675.090978
## iter 100 value 2669.987823
## final value 2669.987823
## stopped after 100 iterations
## # weights: 771
## initial value 4369.541185
## iter 10 value 2920.277576
## iter 20 value 2920.225588
## iter 30 value 2765.071040
## iter 40 value 2735.821599
## iter 50 value 2733.225112
## iter 60 value 2717.311049
## iter 70 value 2683.968972
## iter 80 value 2676.933215
## iter 90 value 2675.911699
## iter 100 value 2671.515481
## final value 2671.515481
## stopped after 100 iterations
## # weights: 221
## initial value 5018.836023
## iter 10 value 2942.626098
## iter 20 value 2940.570165
## iter 30 value 2940.545769
## final value 2940.545492
## converged
## # weights: 331
## initial value 4983.187100
## iter 10 value 2937.848614
## iter 20 value 2931.472435
## iter 30 value 2923.290864
## iter 40 value 2922.358603
## iter 50 value 2843.928168
## iter 60 value 2751.179649
## iter 70 value 2715.096182
## iter 80 value 2709.532981
## iter 90 value 2677.924683
## iter 100 value 2653.949824
## final value 2653.949824
## stopped after 100 iterations
## # weights: 551
## initial value 5223.923265
## iter 10 value 2927.398446
## iter 20 value 2795.268037
## iter 30 value 2716.556940
## iter 40 value 2692.077107
## iter 50 value 2654.101050
## iter 60 value 2614.996443
## iter 70 value 2562.546301
## iter 80 value 2392.452623
## iter 90 value 2115.166437
## iter 100 value 1976.956488
## final value 1976.956488
## stopped after 100 iterations
## # weights: 771
## initial value 4029.823179
## iter 10 value 2750.927833
## iter 20 value 2723.998486
## iter 30 value 2715.893733
## iter 40 value 2701.742322
## iter 50 value 2680.343554
## iter 60 value 2652.183938
## iter 70 value 2291.482005
## iter 80 value 2151.250171
## iter 90 value 2023.392525
## iter 100 value 1860.774320
## final value 1860.774320
## stopped after 100 iterations
## # weights: 221
## initial value 5500.634692
## final value 2940.732281
## converged
## # weights: 331
## initial value 3474.845338
## iter 10 value 2940.815204
## iter 20 value 2761.223674
## iter 30 value 2744.154850
## iter 40 value 2741.964914
## iter 50 value 2736.821373
## iter 60 value 2710.399584
## iter 70 value 2705.434988
## iter 80 value 2703.434529
## iter 90 value 2682.373715
## iter 100 value 2626.216382
## final value 2626.216382
## stopped after 100 iterations
## # weights: 551
## initial value 3662.617453
## iter 10 value 2940.513747
## iter 20 value 2940.361691
## iter 30 value 2933.945455
## iter 40 value 2724.728493
## iter 50 value 2682.908586
## iter 60 value 2680.914904
## iter 70 value 2675.664920
## iter 80 value 2670.749711
## iter 90 value 2663.805713
## iter 100 value 2637.126587
## final value 2637.126587
## stopped after 100 iterations
## # weights: 771
## initial value 3018.652311
## iter 10 value 2925.482814
## iter 20 value 2878.899941
## iter 30 value 2691.559386
## iter 40 value 2644.407804
## iter 50 value 2350.974948
## iter 60 value 2145.019496
## iter 70 value 2018.620001
## iter 80 value 1920.574699
## iter 90 value 1865.929761
## iter 100 value 1835.621093
## final value 1835.621093
## stopped after 100 iterations
## # weights: 221
## initial value 3806.029802
## iter 10 value 2941.923399
## iter 20 value 2941.901001
## final value 2941.860794
## converged
## # weights: 331
## initial value 4826.840308
## iter 10 value 2860.964472
## iter 20 value 2753.692673
## iter 30 value 2750.718594
## final value 2750.714503
## converged
## # weights: 551
## initial value 5640.551877
## iter 10 value 2774.799889
## iter 20 value 2709.819983
## iter 30 value 2704.645047
## iter 40 value 2691.512304
## iter 50 value 2648.757490
## iter 60 value 2615.966101
## iter 70 value 2560.010005
## iter 80 value 1996.414639
## iter 90 value 1769.556591
## iter 100 value 1687.538304
## final value 1687.538304
## stopped after 100 iterations
## # weights: 771
## initial value 12693.449186
## iter 10 value 2988.427156
## iter 20 value 2939.090864
## iter 30 value 2752.473583
## iter 40 value 2733.693127
## iter 50 value 2725.702042
## iter 60 value 2684.639643
## iter 70 value 2627.949018
## iter 80 value 2361.287274
## iter 90 value 2076.744703
## iter 100 value 2000.684748
## final value 2000.684748
## stopped after 100 iterations
## # weights: 221
## initial value 3647.509272
## iter 10 value 2943.629215
## iter 20 value 2942.156532
## iter 30 value 2942.139221
## final value 2942.139030
## converged
## # weights: 331
## initial value 3292.369753
## iter 10 value 2840.100390
## iter 20 value 2695.801780
## iter 30 value 2590.605767
## iter 40 value 2029.784024
## iter 50 value 1917.086917
## iter 60 value 1863.745849
## iter 70 value 1764.161329
## iter 80 value 1720.177993
## iter 90 value 1719.347575
## iter 100 value 1699.916153
## final value 1699.916153
## stopped after 100 iterations
## # weights: 551
## initial value 5078.886991
## iter 10 value 2937.337866
## iter 20 value 2753.091517
## iter 30 value 2750.767045
## iter 40 value 2741.229374
## iter 50 value 2692.284691
## iter 60 value 2672.060026
## iter 70 value 2662.480175
## iter 80 value 2660.073665
## iter 90 value 2655.054986
## iter 100 value 2651.882535
## final value 2651.882535
## stopped after 100 iterations
## # weights: 771
## initial value 4485.928875
## iter 10 value 3038.202290
## iter 20 value 2783.850728
## iter 30 value 2782.228758
## iter 40 value 2780.474225
## iter 50 value 2757.622095
## iter 60 value 2713.762779
## iter 70 value 2627.651319
## iter 80 value 2098.826114
## iter 90 value 1893.981739
## iter 100 value 1771.684758
## final value 1771.684758
## stopped after 100 iterations
## # weights: 221
## initial value 3808.355360
## iter 10 value 2857.140804
## iter 20 value 2781.978761
## iter 30 value 2752.017874
## iter 40 value 2745.162472
## iter 50 value 2730.616655
## iter 60 value 2682.014932
## iter 70 value 2531.661446
## iter 80 value 2272.805931
## iter 90 value 2004.265642
## iter 100 value 1797.527195
## final value 1797.527195
## stopped after 100 iterations
## # weights: 331
## initial value 6378.533935
## iter 10 value 2809.075245
## iter 20 value 2806.997469
## iter 30 value 2747.112798
## iter 40 value 2745.087822
## iter 50 value 2738.993852
## iter 60 value 2683.225272
## iter 70 value 2516.343527
## iter 80 value 2436.482072
## iter 90 value 2414.810541
## iter 100 value 2410.513213
## final value 2410.513213
## stopped after 100 iterations
## # weights: 551
## initial value 3733.664642
## iter 10 value 2928.573538
## iter 20 value 2790.589060
## iter 30 value 2739.450658
## iter 40 value 2737.950984
## iter 50 value 2729.869124
## iter 50 value 2729.869116
## iter 60 value 2698.365900
## iter 70 value 2695.631084
## iter 80 value 2693.779655
## iter 90 value 2689.898157
## iter 100 value 2678.358111
## final value 2678.358111
## stopped after 100 iterations
## # weights: 771
## initial value 3055.694952
## iter 10 value 2943.953111
## iter 20 value 2767.535307
## iter 30 value 2761.711507
## iter 40 value 2730.402311
## iter 50 value 2719.556533
## iter 60 value 2717.563317
## iter 70 value 2716.163500
## iter 80 value 2708.372059
## iter 90 value 2707.759608
## iter 100 value 2706.416346
## final value 2706.416346
## stopped after 100 iterations
## # weights: 331
## initial value 5445.830102
## iter 10 value 4299.946166
## iter 20 value 4206.527572
## iter 30 value 4124.525237
## iter 40 value 4121.712924
## iter 50 value 4115.754227
## iter 60 value 4068.216961
## iter 70 value 4052.408805
## iter 80 value 4036.997484
## iter 90 value 3985.715896
## iter 100 value 3781.278838
## final value 3781.278838
## stopped after 100 iterations
Adult_TDA_KDE_5.40.5_n4_NN1Fit0
## Neural Network
##
## 8741 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 5827, 5827, 5828
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.8283913 0.2528047
## 2 0.5 0.8084874 0.1820410
## 2 0.7 0.8375488 0.3638547
## 3 0.3 0.8237040 0.3006314
## 3 0.5 0.8469303 0.4170242
## 3 0.7 0.8210731 0.2186062
## 5 0.3 0.8372074 0.3340522
## 5 0.5 0.8209591 0.2605409
## 5 0.7 0.8202726 0.2349527
## 7 0.3 0.8123797 0.1568151
## 7 0.5 0.8412098 0.4031067
## 7 0.7 0.8366298 0.4191392
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 3 and decay = 0.5.
Adult_TDA_KDE_5.40.5_n4_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.8650875 0.5644840 Fold3
## 2 0.8239533 0.2059136 Fold2
## 3 0.8517502 0.4806750 Fold1
ad_tda_kde_5.40.5_n4_nn1_fit_re<-Adult_TDA_KDE_5.40.5_n4_NN1Fit0$resample[1]
summary(Adult_TDA_KDE_5.40.5_n4_NN1Fit0)
## a 108-3-1 network with 331 weights
## options were - entropy fitting decay=0.5
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.59 -0.02 -0.14 0.21 0.07 -0.01 -0.08 0.13
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## 0.33 0.11 -0.02 0.00 0.00 0.00 0.70 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 -0.33 -0.27 -0.79 0.00 0.91
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.37 0.31 0.14 0.03 0.69
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## -0.08 -0.04 -0.18 0.02 -0.16 0.47 0.00 0.04
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## -0.36 0.06 0.18 -0.18 0.25 0.17 -0.01 0.02
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## -0.01 -0.10 0.21 -1.81 0.10 0.17 -0.23 -0.18
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 2.54 0.32 0.01 0.08 0.15 0.03 0.27 0.32
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.01 -0.01 0.37 -0.08 -0.13 0.13 0.10
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.01 0.00 0.02 -0.03 0.32 -0.03 0.00 0.31
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.18 -0.03 0.00 -0.01 -0.01 0.00 -0.01 -0.01
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## 0.28 0.20 0.03 0.02 0.02 0.11 0.02 -0.01
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## -0.02 -0.34 -0.04 0.00 0.14 -0.03 -0.10 0.03
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## -0.02 -0.03 -0.76 -0.02 -0.01
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## -0.52 -0.15 -0.03 -0.52 -0.38 -0.01 0.92 -0.52
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## -0.03 0.06 0.00 0.00 0.00 0.00 0.16 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 -0.28 0.11 2.06 0.00 -0.18
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.00 0.00 0.00 -2.38 0.42 -0.22 -0.03 -0.52
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.39 0.16 -0.10 -0.19 -0.05 0.28 -0.01 -0.32
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 0.22 0.03 0.02 -0.59 -0.70 -0.01 0.63 -0.39
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## -0.14 0.37 0.14 -0.87 0.66 0.18 -0.87 -0.02
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## 0.40 -0.18 0.02 0.15 -0.19 -0.32 -0.44 -0.08
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.00 0.00 0.35 0.00 0.11 0.10 0.02
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## -0.26 -0.04 -0.01 0.08 -0.25 0.02 -0.02 0.00
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.01 0.05 0.00 0.02 0.00 -0.01 -0.01 -0.27
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## -0.04 -0.13 0.16 0.05 -0.01 0.21 0.08 0.00
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.03 -0.02 0.00 0.05 0.07 0.00 -0.06 -0.01
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## -0.06 -0.02 -0.71 0.03 0.00
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.00 0.00 0.00 0.00 0.00
## b->o h1->o h2->o h3->o
## 3.67 -9.26 2.08 3.40
vip(Adult_TDA_KDE_5.40.5_n4_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.40.5_n4_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.40.5_n4_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n4_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n4_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n4_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6091 1553
## >50K 1325 799
##
## Accuracy : 0.7054
## 95% CI : (0.6962, 0.7144)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.1666
##
## Mcnemar's Test P-Value : 2.323e-05
##
## Sensitivity : 0.8213
## Specificity : 0.3397
## Pos Pred Value : 0.7968
## Neg Pred Value : 0.3762
## Prevalence : 0.7592
## Detection Rate : 0.6236
## Detection Prevalence : 0.7826
## Balanced Accuracy : 0.5805
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n4_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6091 1553
## >50K 1325 799
##
## Accuracy : 0.7054
## 95% CI : (0.6962, 0.7144)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.1666
##
## Mcnemar's Test P-Value : 2.323e-05
##
## Sensitivity : 0.8213
## Specificity : 0.3397
## Pos Pred Value : 0.7968
## Neg Pred Value : 0.3762
## Prevalence : 0.7592
## Detection Rate : 0.6236
## Detection Prevalence : 0.7826
## Balanced Accuracy : 0.5805
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n4_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.053645e-01 1.665557e-01 6.962124e-01 7.143939e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 2.322777e-05
ad_tda_kde_5.40.5_n4_nn1_cf0_ov_acc<-ad_tda_kde_5.40.5_n4_nn1_cf0$overall[1]
ad_tda_kde_5.40.5_n4_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.8213323 0.3397109 0.7968341
## Neg Pred Value Precision Recall
## 0.3761770 0.7968341 0.8213323
## F1 Prevalence Detection Rate
## 0.8088977 0.7592138 0.6235667
## Detection Prevalence Balanced Accuracy
## 0.7825553 0.5805216
ad_tda_kde_5.40.5_n4_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n4_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_nn1_n4_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.40.5_n4_nn1_fit_re)
diff_tda_kde_5.40.5_nn1_n4_3_fold
## Accuracy
## 1 -0.01514677
## 2 -0.03098149
## 3 -0.03469305
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_nn1.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nn1_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_nn1.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_nn1.n4_3_fold_odds.left<-bst_tda_kde_5.40.5_nn1.n4_3_fold$probLeft/bst_tda_kde_5.40.5_nn1.n4_3_fold$probRight
bst_tda_kde_5.40.5_nn1.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_nn1.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nn1_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_nn1.n4_3_fold
## $winLeft
## [1] 0.9604
##
## $winRope
## [1] 0.0396
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_nn1.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nn1_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nn1.n4_3_fold
## $left
## [1] 0.9329398
##
## $rope
## [1] 0.05038429
##
## $right
## [1] 0.01667588
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_nn1_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_nn1.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nn1_n4_3_fold))
#bf_tda_kde_5.40.5_nn1.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_nn1_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_nn1_n4_3_fold)
## t = -4.495, df = 2, p-value = 0.0461
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.052727876 -0.001153002
## sample estimates:
## mean of x
## -0.02694044
### Test set diff
diff_tda_kde_5.40.5_nn1.n4_test<-(nn1_cf_ov_acc - ad_tda_kde_5.40.5_n4_nn1_cf0_ov_acc)
diff_tda_kde_5.40.5_nn1.n4_test
## Accuracy
## 0.0538493
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_nn1.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nn1.n4_test),-0.01,0.01)
bst_tda_kde_5.40.5_nn1.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_nn1.n4_test_odds.left<-bst_tda_kde_5.40.5_nn1.n4_test$probLeft/bst_tda_kde_5.40.5_nn1.n4_test$probRight
bst_tda_kde_5.40.5_nn1.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_nn1.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nn1.n4_test),-0.01,0.01)
bsr_tda_kde_5.40.5_nn1.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1609
##
## $winRight
## [1] 0.8391
# Bayesian Correlated Test
bct_tda_kde_5.40.5_nn1.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nn1.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nn1.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_nn1.n4_test)))
#BayesFactor
#bf_tda_kde_5.40.5_nn1.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nn1.n4_test)) #bf_tda_pca_5.40.5_nn1.n4_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_nn1.n4_test))
##Node5
#Neural Network 1
Adult_TDA_KDE_5.40.5_n5_NN1Fit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n5.vec,
Importance = T,
method = 'nnet',
trControl = fitControl,
tuneGrid = nn1Grid,
metric='Accuracy')
## # weights: 221
## initial value 3269.045504
## iter 10 value 1865.868854
## iter 20 value 1748.506493
## iter 30 value 1748.227799
## iter 40 value 1748.223102
## iter 50 value 1745.550855
## iter 60 value 1744.884688
## iter 70 value 1744.877172
## iter 70 value 1744.877155
## iter 70 value 1744.877154
## final value 1744.877154
## converged
## # weights: 331
## initial value 2243.759558
## iter 10 value 1868.290781
## iter 20 value 1833.736695
## iter 30 value 1739.455336
## iter 40 value 1686.653440
## iter 50 value 1668.844883
## iter 60 value 1490.208414
## iter 70 value 1364.988054
## iter 80 value 1332.308674
## iter 90 value 1240.738551
## iter 100 value 1186.525957
## final value 1186.525957
## stopped after 100 iterations
## # weights: 551
## initial value 3348.268517
## iter 10 value 1797.378976
## iter 20 value 1726.991062
## iter 30 value 1717.968929
## iter 40 value 1695.253407
## iter 50 value 1634.358971
## iter 60 value 1482.890917
## iter 70 value 1331.279436
## iter 80 value 1249.906239
## iter 90 value 1192.493358
## iter 100 value 1187.776299
## final value 1187.776299
## stopped after 100 iterations
## # weights: 771
## initial value 4130.199564
## iter 10 value 1812.761157
## iter 20 value 1732.362258
## iter 30 value 1705.502334
## iter 40 value 1699.403379
## iter 50 value 1693.377021
## iter 60 value 1687.058395
## iter 70 value 1643.328630
## iter 80 value 1439.240129
## iter 90 value 1375.981892
## iter 100 value 1300.754436
## final value 1300.754436
## stopped after 100 iterations
## # weights: 221
## initial value 2598.619157
## iter 10 value 1795.530354
## iter 20 value 1746.005348
## iter 30 value 1741.461177
## iter 40 value 1722.471716
## iter 50 value 1718.710021
## final value 1718.631853
## converged
## # weights: 331
## initial value 2250.964052
## iter 10 value 1875.205628
## iter 20 value 1873.758303
## iter 30 value 1873.741285
## iter 40 value 1871.850098
## iter 50 value 1865.187778
## iter 60 value 1864.382789
## iter 70 value 1862.854722
## iter 80 value 1862.835817
## final value 1862.835589
## converged
## # weights: 551
## initial value 2743.992338
## iter 10 value 1874.969524
## iter 20 value 1873.632243
## iter 30 value 1860.157541
## iter 40 value 1771.970618
## iter 50 value 1434.061586
## iter 60 value 1290.072731
## iter 70 value 1229.095583
## iter 80 value 1211.446447
## iter 90 value 1206.484264
## iter 100 value 1197.587344
## final value 1197.587344
## stopped after 100 iterations
## # weights: 771
## initial value 2028.016159
## iter 10 value 1770.626290
## iter 20 value 1747.154452
## iter 30 value 1740.694723
## iter 40 value 1731.111406
## iter 50 value 1727.480722
## final value 1727.270400
## converged
## # weights: 221
## initial value 3259.637195
## iter 10 value 1875.741401
## iter 20 value 1875.142629
## iter 30 value 1857.543955
## iter 40 value 1819.913086
## iter 50 value 1737.424532
## iter 60 value 1623.925347
## iter 70 value 1496.856522
## iter 80 value 1295.842677
## iter 90 value 1238.483664
## iter 100 value 1216.790766
## final value 1216.790766
## stopped after 100 iterations
## # weights: 331
## initial value 3739.135424
## iter 10 value 1871.546885
## iter 20 value 1859.054030
## iter 30 value 1838.916942
## iter 40 value 1802.714705
## iter 50 value 1744.749700
## iter 60 value 1734.047589
## iter 70 value 1721.057050
## iter 80 value 1596.732165
## iter 90 value 1371.311685
## iter 100 value 1306.991847
## final value 1306.991847
## stopped after 100 iterations
## # weights: 551
## initial value 4298.113328
## iter 10 value 1871.806159
## iter 20 value 1779.657482
## iter 30 value 1758.814099
## iter 40 value 1748.309992
## iter 50 value 1733.212809
## iter 60 value 1731.177436
## iter 70 value 1659.047112
## iter 80 value 1286.084915
## iter 90 value 1245.231413
## iter 100 value 1238.392653
## final value 1238.392653
## stopped after 100 iterations
## # weights: 771
## initial value 5890.622781
## iter 10 value 1822.212125
## iter 20 value 1759.800615
## iter 30 value 1720.102319
## iter 40 value 1706.930726
## iter 50 value 1701.764034
## iter 60 value 1688.232465
## iter 70 value 1679.466458
## iter 80 value 1675.981088
## iter 90 value 1645.397074
## iter 100 value 1529.252298
## final value 1529.252298
## stopped after 100 iterations
## # weights: 221
## initial value 3055.015087
## iter 10 value 1838.017661
## iter 20 value 1794.081438
## iter 30 value 1736.542040
## iter 40 value 1455.357941
## iter 50 value 1400.902523
## iter 60 value 1334.404068
## iter 70 value 1287.109335
## iter 80 value 1278.687079
## iter 90 value 1276.717492
## iter 100 value 1276.591905
## final value 1276.591905
## stopped after 100 iterations
## # weights: 331
## initial value 2385.793124
## iter 10 value 1875.582107
## iter 10 value 1875.582094
## iter 10 value 1875.582086
## final value 1875.582086
## converged
## # weights: 551
## initial value 5850.110221
## iter 10 value 1874.678440
## iter 20 value 1814.428413
## iter 30 value 1745.708574
## iter 40 value 1745.130999
## final value 1745.114781
## converged
## # weights: 771
## initial value 2123.127137
## iter 10 value 1875.426686
## iter 20 value 1801.453127
## iter 30 value 1796.830270
## iter 40 value 1737.405281
## iter 50 value 1729.498712
## iter 60 value 1729.364429
## iter 70 value 1729.297323
## iter 80 value 1729.072717
## iter 90 value 1724.829705
## iter 100 value 1713.242477
## final value 1713.242477
## stopped after 100 iterations
## # weights: 221
## initial value 3737.998938
## iter 10 value 1876.701488
## iter 20 value 1875.644322
## iter 30 value 1870.991973
## iter 40 value 1799.546565
## iter 50 value 1640.271250
## iter 60 value 1370.969229
## iter 70 value 1311.082136
## iter 80 value 1257.960226
## iter 90 value 1227.701159
## iter 100 value 1217.382133
## final value 1217.382133
## stopped after 100 iterations
## # weights: 331
## initial value 4141.783964
## iter 10 value 1874.782654
## iter 20 value 1859.040235
## iter 30 value 1748.600803
## iter 40 value 1744.459306
## iter 50 value 1730.513907
## iter 60 value 1727.659630
## iter 70 value 1714.917214
## iter 80 value 1673.484060
## iter 90 value 1564.969678
## iter 100 value 1394.404850
## final value 1394.404850
## stopped after 100 iterations
## # weights: 551
## initial value 2923.984518
## iter 10 value 1855.392496
## iter 20 value 1759.523317
## iter 30 value 1747.633082
## iter 40 value 1735.128590
## iter 50 value 1688.037139
## iter 60 value 1665.664400
## iter 70 value 1640.805135
## iter 80 value 1607.676959
## iter 90 value 1467.203459
## iter 100 value 1379.074165
## final value 1379.074165
## stopped after 100 iterations
## # weights: 771
## initial value 2350.657732
## iter 10 value 1862.235244
## iter 20 value 1769.687148
## iter 30 value 1710.214712
## iter 40 value 1694.785660
## iter 50 value 1691.241994
## iter 60 value 1688.887357
## iter 70 value 1683.198812
## iter 80 value 1655.411847
## iter 90 value 1625.904567
## iter 100 value 1505.161656
## final value 1505.161656
## stopped after 100 iterations
## # weights: 221
## initial value 2920.528101
## iter 10 value 1876.177909
## final value 1876.177709
## converged
## # weights: 331
## initial value 4298.408146
## iter 10 value 1793.313824
## iter 20 value 1753.887937
## iter 30 value 1678.816965
## iter 40 value 1656.832797
## iter 50 value 1488.563439
## iter 60 value 1284.203653
## iter 70 value 1214.925360
## iter 80 value 1205.808706
## iter 90 value 1203.883828
## iter 100 value 1203.505261
## final value 1203.505261
## stopped after 100 iterations
## # weights: 551
## initial value 2615.648353
## iter 10 value 1876.154301
## iter 20 value 1875.559206
## iter 30 value 1875.552346
## iter 30 value 1875.552331
## iter 40 value 1866.940085
## iter 50 value 1752.021819
## iter 60 value 1731.067338
## iter 70 value 1594.616011
## iter 80 value 1406.827814
## iter 90 value 1304.355190
## iter 100 value 1261.271831
## final value 1261.271831
## stopped after 100 iterations
## # weights: 771
## initial value 2526.707922
## iter 10 value 1870.282796
## iter 20 value 1809.053463
## iter 30 value 1794.819332
## iter 40 value 1744.726459
## iter 50 value 1744.521012
## iter 60 value 1724.522629
## iter 70 value 1717.271247
## iter 80 value 1592.412646
## iter 90 value 1377.466252
## iter 100 value 1302.095088
## final value 1302.095088
## stopped after 100 iterations
## # weights: 221
## initial value 3411.987446
## iter 10 value 1875.401684
## iter 20 value 1787.284330
## iter 30 value 1737.280477
## iter 40 value 1732.440813
## iter 50 value 1719.679955
## iter 60 value 1645.971469
## iter 70 value 1419.729166
## iter 80 value 1286.962688
## iter 90 value 1280.364716
## iter 100 value 1274.862541
## final value 1274.862541
## stopped after 100 iterations
## # weights: 331
## initial value 5473.138785
## iter 10 value 1857.282443
## iter 20 value 1799.385210
## iter 30 value 1756.528963
## iter 40 value 1720.172435
## iter 50 value 1713.730880
## iter 60 value 1712.375751
## iter 70 value 1712.191889
## final value 1712.189910
## converged
## # weights: 551
## initial value 3045.207585
## iter 10 value 1843.118338
## iter 20 value 1782.831661
## iter 30 value 1720.522667
## iter 40 value 1703.279408
## iter 50 value 1691.043864
## iter 60 value 1653.303541
## iter 70 value 1524.235002
## iter 80 value 1348.065341
## iter 90 value 1306.962319
## iter 100 value 1260.980819
## final value 1260.980819
## stopped after 100 iterations
## # weights: 771
## initial value 3134.990177
## iter 10 value 1853.889383
## iter 20 value 1753.318649
## iter 30 value 1724.731230
## iter 40 value 1705.508087
## iter 50 value 1700.341314
## iter 60 value 1692.521053
## iter 70 value 1635.141845
## iter 80 value 1599.625093
## iter 90 value 1415.451545
## iter 100 value 1275.917941
## final value 1275.917941
## stopped after 100 iterations
## # weights: 221
## initial value 3007.381605
## iter 10 value 1865.729007
## iter 20 value 1769.460110
## iter 30 value 1751.740782
## iter 40 value 1749.977614
## iter 50 value 1749.917360
## iter 60 value 1749.574845
## iter 70 value 1739.608465
## iter 80 value 1730.283370
## iter 90 value 1713.112416
## iter 100 value 1633.364955
## final value 1633.364955
## stopped after 100 iterations
## # weights: 331
## initial value 4351.848140
## iter 10 value 1761.398296
## iter 20 value 1754.560599
## iter 30 value 1750.487661
## iter 40 value 1742.524128
## iter 50 value 1733.791062
## iter 60 value 1721.614087
## iter 70 value 1646.564206
## iter 80 value 1455.926248
## iter 90 value 1396.531999
## iter 100 value 1332.249301
## final value 1332.249301
## stopped after 100 iterations
## # weights: 551
## initial value 4170.975004
## iter 10 value 1875.507656
## iter 20 value 1851.336027
## iter 30 value 1798.841145
## iter 40 value 1716.410916
## iter 50 value 1655.619548
## iter 60 value 1624.863282
## iter 70 value 1566.559888
## iter 80 value 1475.547331
## iter 90 value 1445.598821
## iter 100 value 1388.984044
## final value 1388.984044
## stopped after 100 iterations
## # weights: 771
## initial value 3851.882032
## iter 10 value 1871.630239
## iter 20 value 1772.987321
## iter 30 value 1748.907661
## iter 40 value 1742.590802
## iter 50 value 1736.162007
## iter 60 value 1732.282288
## iter 70 value 1725.288089
## iter 80 value 1682.015459
## iter 90 value 1515.666506
## iter 100 value 1338.776015
## final value 1338.776015
## stopped after 100 iterations
## # weights: 221
## initial value 3862.960169
## iter 10 value 1877.802425
## iter 20 value 1874.089144
## iter 30 value 1785.170159
## iter 40 value 1772.003422
## iter 50 value 1753.787477
## iter 60 value 1733.985934
## iter 70 value 1707.743410
## iter 80 value 1659.426320
## iter 90 value 1472.693222
## iter 100 value 1411.873137
## final value 1411.873137
## stopped after 100 iterations
## # weights: 331
## initial value 4581.276154
## iter 10 value 1877.368451
## iter 20 value 1875.848392
## iter 30 value 1875.830559
## iter 40 value 1833.636591
## iter 50 value 1748.337728
## iter 60 value 1725.798760
## iter 70 value 1426.819577
## iter 80 value 1354.557104
## iter 90 value 1268.428450
## iter 100 value 1248.316145
## final value 1248.316145
## stopped after 100 iterations
## # weights: 551
## initial value 4679.845220
## iter 10 value 1870.861503
## iter 20 value 1836.597349
## iter 30 value 1644.342105
## iter 40 value 1460.704466
## iter 50 value 1397.504594
## iter 60 value 1281.445238
## iter 70 value 1256.235906
## iter 80 value 1249.814295
## iter 90 value 1238.793705
## iter 100 value 1235.245190
## final value 1235.245190
## stopped after 100 iterations
## # weights: 771
## initial value 3320.197159
## iter 10 value 1773.130778
## iter 20 value 1763.488968
## iter 30 value 1754.585907
## iter 40 value 1749.359950
## iter 50 value 1733.114290
## iter 60 value 1725.496987
## iter 70 value 1721.728428
## iter 80 value 1718.923890
## iter 90 value 1713.868960
## iter 100 value 1642.003196
## final value 1642.003196
## stopped after 100 iterations
## # weights: 771
## initial value 11811.625253
## iter 10 value 2765.188399
## iter 20 value 2708.237117
## iter 30 value 2329.291391
## iter 40 value 2054.155714
## iter 50 value 1901.765800
## iter 60 value 1833.600825
## iter 70 value 1803.001414
## iter 80 value 1777.190843
## iter 90 value 1765.780808
## iter 100 value 1752.217035
## final value 1752.217035
## stopped after 100 iterations
Adult_TDA_KDE_5.40.5_n5_NN1Fit0
## Neural Network
##
## 6628 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 4418, 4419, 4419
## Resampling results across tuning parameters:
##
## size decay Accuracy Kappa
## 2 0.3 0.8645142 0.2460836
## 2 0.5 0.8654194 0.2335553
## 2 0.7 0.8589316 0.1869908
## 3 0.3 0.8589318 0.1813695
## 3 0.5 0.8614983 0.1496912
## 3 0.7 0.8655730 0.3204874
## 5 0.3 0.8657225 0.3164292
## 5 0.5 0.8634583 0.2572368
## 5 0.7 0.8687402 0.3886870
## 7 0.3 0.8685881 0.2382796
## 7 0.5 0.8702480 0.3065192
## 7 0.7 0.8679849 0.2330469
##
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were size = 7 and decay = 0.5.
Adult_TDA_KDE_5.40.5_n5_NN1Fit0$resample
## Accuracy Kappa Resample
## 1 0.8605704 0.2623094 Fold2
## 2 0.8665158 0.2107468 Fold1
## 3 0.8836578 0.4465014 Fold3
ad_tda_kde_5.40.5_n5_nn1_fit_re<-Adult_TDA_KDE_5.40.5_n5_NN1Fit0$resample[1]
summary(Adult_TDA_KDE_5.40.5_n5_NN1Fit0)
## a 108-7-1 network with 771 weights
## options were - entropy fitting decay=0.5
## b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1
## 0.08 -0.37 0.09 -0.15 0.73 0.00 -0.57 0.26
## i8->h1 i9->h1 i10->h1 i11->h1 i12->h1 i13->h1 i14->h1 i15->h1
## -0.14 -0.16 0.01 0.00 0.00 0.00 0.00 0.00
## i16->h1 i17->h1 i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
## 0.00 0.00 0.00 0.00 0.13 0.00 0.00 -0.18
## i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1 i30->h1 i31->h1
## 0.00 0.00 0.00 0.12 1.08 0.01 -0.19 0.30
## i32->h1 i33->h1 i34->h1 i35->h1 i36->h1 i37->h1 i38->h1 i39->h1
## -0.01 0.28 -0.30 -0.01 0.10 -0.45 0.01 0.29
## i40->h1 i41->h1 i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
## -0.01 -0.45 -0.13 0.27 -0.30 0.07 0.58 0.03
## i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1 i54->h1 i55->h1
## 0.20 0.04 -0.17 0.30 0.01 -0.09 -0.36 0.15
## i56->h1 i57->h1 i58->h1 i59->h1 i60->h1 i61->h1 i62->h1 i63->h1
## 0.07 0.10 -0.55 0.16 0.01 0.35 -0.04 0.12
## i64->h1 i65->h1 i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
## 0.00 0.00 0.01 0.05 0.00 0.01 -0.02 0.02
## i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1 i78->h1 i79->h1
## 0.00 0.02 -0.12 0.02 0.04 0.00 0.19 0.00
## i80->h1 i81->h1 i82->h1 i83->h1 i84->h1 i85->h1 i86->h1 i87->h1
## 0.02 0.00 0.00 0.00 0.01 0.01 0.01 0.01
## i88->h1 i89->h1 i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
## -0.09 0.03 -0.11 0.01 0.04 -0.07 0.00 0.00
## i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1 i102->h1 i103->h1
## 0.00 -0.17 0.03 0.00 0.03 0.00 -0.07 0.00
## i104->h1 i105->h1 i106->h1 i107->h1 i108->h1
## 0.00 0.00 0.23 -0.06 0.00
## b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2
## 0.24 -0.24 0.03 -0.30 -0.17 0.04 1.40 -0.20
## i8->h2 i9->h2 i10->h2 i11->h2 i12->h2 i13->h2 i14->h2 i15->h2
## -0.32 -0.26 0.01 0.00 0.00 0.00 0.00 0.00
## i16->h2 i17->h2 i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
## 0.00 0.00 0.00 0.00 -0.42 0.00 0.00 1.31
## i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2 i30->h2 i31->h2
## 0.00 0.00 0.00 -0.64 0.66 0.53 -0.74 -0.79
## i32->h2 i33->h2 i34->h2 i35->h2 i36->h2 i37->h2 i38->h2 i39->h2
## 0.14 0.19 0.86 0.06 0.08 0.53 0.01 -0.11
## i40->h2 i41->h2 i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
## 1.11 0.33 0.02 -0.21 -1.16 0.21 -0.85 -0.11
## i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2 i54->h2 i55->h2
## 0.57 0.53 -0.70 -0.01 -0.26 0.34 -0.38 1.53
## i56->h2 i57->h2 i58->h2 i59->h2 i60->h2 i61->h2 i62->h2 i63->h2
## -0.97 0.45 -0.56 0.97 0.16 -0.79 0.63 -0.39
## i64->h2 i65->h2 i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
## 0.00 0.01 -0.05 -0.21 0.01 0.11 -0.33 0.11
## i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2 i78->h2 i79->h2
## -0.02 0.09 -0.05 0.03 0.12 0.01 0.12 0.01
## i80->h2 i81->h2 i82->h2 i83->h2 i84->h2 i85->h2 i86->h2 i87->h2
## 0.07 0.02 0.00 0.02 0.05 0.01 0.01 -0.08
## i88->h2 i89->h2 i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
## 0.13 0.01 -0.12 0.03 0.10 -0.40 0.02 0.05
## i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2 i102->h2 i103->h2
## 0.03 0.09 -0.24 0.04 0.06 0.02 0.04 0.02
## i104->h2 i105->h2 i106->h2 i107->h2 i108->h2
## -0.02 0.00 0.36 -0.07 -0.03
## b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3
## -0.03 -0.44 0.04 -0.04 0.06 0.00 0.15 0.03
## i8->h3 i9->h3 i10->h3 i11->h3 i12->h3 i13->h3 i14->h3 i15->h3
## -0.29 0.03 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h3 i17->h3 i18->h3 i19->h3 i20->h3 i21->h3 i22->h3 i23->h3
## 0.00 0.00 0.00 0.00 0.35 0.00 0.00 -0.47
## i24->h3 i25->h3 i26->h3 i27->h3 i28->h3 i29->h3 i30->h3 i31->h3
## 0.00 0.00 0.00 0.09 0.52 -0.30 0.14 0.65
## i32->h3 i33->h3 i34->h3 i35->h3 i36->h3 i37->h3 i38->h3 i39->h3
## -0.02 -0.39 -0.10 -0.02 0.04 -0.28 0.00 0.33
## i40->h3 i41->h3 i42->h3 i43->h3 i44->h3 i45->h3 i46->h3 i47->h3
## -0.20 0.09 -0.06 0.06 0.08 -0.01 -0.13 0.15
## i48->h3 i49->h3 i50->h3 i51->h3 i52->h3 i53->h3 i54->h3 i55->h3
## 0.13 -0.15 -0.06 0.72 -0.24 0.02 -0.34 -0.22
## i56->h3 i57->h3 i58->h3 i59->h3 i60->h3 i61->h3 i62->h3 i63->h3
## 0.03 0.01 -0.17 -0.22 -0.04 0.40 -0.19 0.16
## i64->h3 i65->h3 i66->h3 i67->h3 i68->h3 i69->h3 i70->h3 i71->h3
## 0.00 0.00 0.28 0.02 -0.01 0.00 -0.06 -0.02
## i72->h3 i73->h3 i74->h3 i75->h3 i76->h3 i77->h3 i78->h3 i79->h3
## 0.00 -0.04 0.04 -0.04 -0.05 0.00 0.04 -0.01
## i80->h3 i81->h3 i82->h3 i83->h3 i84->h3 i85->h3 i86->h3 i87->h3
## -0.01 0.00 0.00 0.00 -0.01 0.00 0.00 0.04
## i88->h3 i89->h3 i90->h3 i91->h3 i92->h3 i93->h3 i94->h3 i95->h3
## -0.01 0.01 0.06 -0.01 -0.02 -0.06 -0.02 -0.01
## i96->h3 i97->h3 i98->h3 i99->h3 i100->h3 i101->h3 i102->h3 i103->h3
## 0.00 -0.04 0.06 0.01 -0.05 0.00 0.01 -0.03
## i104->h3 i105->h3 i106->h3 i107->h3 i108->h3
## 0.02 -0.02 0.14 0.06 0.00
## b->h4 i1->h4 i2->h4 i3->h4 i4->h4 i5->h4 i6->h4 i7->h4
## 0.06 0.53 0.04 0.05 0.20 0.00 -0.27 0.06
## i8->h4 i9->h4 i10->h4 i11->h4 i12->h4 i13->h4 i14->h4 i15->h4
## -0.01 -0.01 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h4 i17->h4 i18->h4 i19->h4 i20->h4 i21->h4 i22->h4 i23->h4
## 0.00 0.00 0.00 0.00 0.14 0.00 0.00 -0.05
## i24->h4 i25->h4 i26->h4 i27->h4 i28->h4 i29->h4 i30->h4 i31->h4
## 0.00 0.00 0.00 -0.03 0.80 -0.09 0.00 0.27
## i32->h4 i33->h4 i34->h4 i35->h4 i36->h4 i37->h4 i38->h4 i39->h4
## 0.05 -0.09 -0.05 -0.02 0.05 0.08 0.00 -0.08
## i40->h4 i41->h4 i42->h4 i43->h4 i44->h4 i45->h4 i46->h4 i47->h4
## 0.02 0.02 0.05 0.10 -0.08 0.00 -0.07 0.00
## i48->h4 i49->h4 i50->h4 i51->h4 i52->h4 i53->h4 i54->h4 i55->h4
## -0.07 0.02 0.01 0.22 -0.15 0.00 -0.01 -0.05
## i56->h4 i57->h4 i58->h4 i59->h4 i60->h4 i61->h4 i62->h4 i63->h4
## 0.04 0.04 0.01 0.01 0.00 -0.01 0.00 0.06
## i64->h4 i65->h4 i66->h4 i67->h4 i68->h4 i69->h4 i70->h4 i71->h4
## 0.00 -0.01 -0.41 -0.03 0.01 -0.03 -0.01 -0.01
## i72->h4 i73->h4 i74->h4 i75->h4 i76->h4 i77->h4 i78->h4 i79->h4
## 0.00 0.01 0.01 0.00 -0.01 0.00 -0.08 0.00
## i80->h4 i81->h4 i82->h4 i83->h4 i84->h4 i85->h4 i86->h4 i87->h4
## -0.01 0.00 0.00 0.00 0.01 0.00 0.01 0.00
## i88->h4 i89->h4 i90->h4 i91->h4 i92->h4 i93->h4 i94->h4 i95->h4
## 0.01 0.01 -0.05 -0.01 0.00 0.00 0.00 0.00
## i96->h4 i97->h4 i98->h4 i99->h4 i100->h4 i101->h4 i102->h4 i103->h4
## 0.00 0.00 0.00 -0.03 0.00 0.00 0.00 0.02
## i104->h4 i105->h4 i106->h4 i107->h4 i108->h4
## 0.00 0.00 0.25 -0.01 0.01
## b->h5 i1->h5 i2->h5 i3->h5 i4->h5 i5->h5 i6->h5 i7->h5
## 0.17 -0.04 0.23 -0.79 0.00 0.11 -0.11 -0.24
## i8->h5 i9->h5 i10->h5 i11->h5 i12->h5 i13->h5 i14->h5 i15->h5
## 0.52 0.32 0.14 0.00 0.00 0.00 0.00 0.00
## i16->h5 i17->h5 i18->h5 i19->h5 i20->h5 i21->h5 i22->h5 i23->h5
## 0.00 0.00 0.00 0.00 -0.72 0.00 0.00 0.71
## i24->h5 i25->h5 i26->h5 i27->h5 i28->h5 i29->h5 i30->h5 i31->h5
## 0.00 0.00 0.00 0.18 0.31 0.36 -1.14 -0.91
## i32->h5 i33->h5 i34->h5 i35->h5 i36->h5 i37->h5 i38->h5 i39->h5
## 0.46 0.82 0.50 0.07 0.34 -0.09 0.14 0.07
## i40->h5 i41->h5 i42->h5 i43->h5 i44->h5 i45->h5 i46->h5 i47->h5
## -0.63 0.62 0.52 0.16 0.67 0.39 -0.87 -0.48
## i48->h5 i49->h5 i50->h5 i51->h5 i52->h5 i53->h5 i54->h5 i55->h5
## -0.26 -0.81 0.41 -0.21 -0.04 0.38 0.79 0.16
## i56->h5 i57->h5 i58->h5 i59->h5 i60->h5 i61->h5 i62->h5 i63->h5
## -0.91 0.50 -0.39 -0.30 0.90 -0.53 0.11 0.06
## i64->h5 i65->h5 i66->h5 i67->h5 i68->h5 i69->h5 i70->h5 i71->h5
## 0.00 0.00 -0.02 -0.15 0.33 -0.13 -0.50 0.54
## i72->h5 i73->h5 i74->h5 i75->h5 i76->h5 i77->h5 i78->h5 i79->h5
## -0.54 0.69 -0.30 0.83 0.11 0.02 -0.55 0.26
## i80->h5 i81->h5 i82->h5 i83->h5 i84->h5 i85->h5 i86->h5 i87->h5
## -0.53 0.05 0.00 0.07 0.49 0.09 0.28 -0.34
## i88->h5 i89->h5 i90->h5 i91->h5 i92->h5 i93->h5 i94->h5 i95->h5
## -0.33 0.38 -0.92 0.68 0.35 0.06 0.31 0.08
## i96->h5 i97->h5 i98->h5 i99->h5 i100->h5 i101->h5 i102->h5 i103->h5
## 0.40 -0.93 0.04 -0.21 0.51 0.45 -0.07 0.19
## i104->h5 i105->h5 i106->h5 i107->h5 i108->h5
## -0.19 0.34 -0.45 -0.28 -0.95
## b->h6 i1->h6 i2->h6 i3->h6 i4->h6 i5->h6 i6->h6 i7->h6
## -0.02 -0.06 0.35 -0.90 -0.02 0.00 -0.45 0.25
## i8->h6 i9->h6 i10->h6 i11->h6 i12->h6 i13->h6 i14->h6 i15->h6
## 1.08 -0.33 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h6 i17->h6 i18->h6 i19->h6 i20->h6 i21->h6 i22->h6 i23->h6
## 0.00 0.00 0.00 0.00 -0.22 0.00 0.00 -0.21
## i24->h6 i25->h6 i26->h6 i27->h6 i28->h6 i29->h6 i30->h6 i31->h6
## 0.00 0.00 0.00 0.41 -0.21 -0.54 -0.30 -0.13
## i32->h6 i33->h6 i34->h6 i35->h6 i36->h6 i37->h6 i38->h6 i39->h6
## -0.25 0.36 0.78 0.08 0.35 -0.97 0.02 0.45
## i40->h6 i41->h6 i42->h6 i43->h6 i44->h6 i45->h6 i46->h6 i47->h6
## -0.03 0.31 0.23 -0.07 -0.19 -0.07 0.00 -0.38
## i48->h6 i49->h6 i50->h6 i51->h6 i52->h6 i53->h6 i54->h6 i55->h6
## -0.01 -0.30 0.64 -0.47 -0.15 0.80 0.39 0.29
## i56->h6 i57->h6 i58->h6 i59->h6 i60->h6 i61->h6 i62->h6 i63->h6
## -0.87 -0.17 0.00 0.57 0.86 -1.28 -0.62 0.60
## i64->h6 i65->h6 i66->h6 i67->h6 i68->h6 i69->h6 i70->h6 i71->h6
## 0.00 0.00 -0.02 -0.31 0.00 -0.20 -0.09 -0.01
## i72->h6 i73->h6 i74->h6 i75->h6 i76->h6 i77->h6 i78->h6 i79->h6
## 0.31 0.02 0.93 -0.09 -0.41 0.00 0.00 -0.04
## i80->h6 i81->h6 i82->h6 i83->h6 i84->h6 i85->h6 i86->h6 i87->h6
## -0.01 0.00 0.00 0.00 0.00 0.00 -0.01 -0.01
## i88->h6 i89->h6 i90->h6 i91->h6 i92->h6 i93->h6 i94->h6 i95->h6
## -0.08 0.04 0.00 0.01 -0.03 0.16 0.01 -0.01
## i96->h6 i97->h6 i98->h6 i99->h6 i100->h6 i101->h6 i102->h6 i103->h6
## 0.01 0.10 -0.11 0.00 0.02 0.01 -0.02 -0.03
## i104->h6 i105->h6 i106->h6 i107->h6 i108->h6
## 0.00 0.00 -0.20 0.00 0.00
## b->h7 i1->h7 i2->h7 i3->h7 i4->h7 i5->h7 i6->h7 i7->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i8->h7 i9->h7 i10->h7 i11->h7 i12->h7 i13->h7 i14->h7 i15->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i16->h7 i17->h7 i18->h7 i19->h7 i20->h7 i21->h7 i22->h7 i23->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i24->h7 i25->h7 i26->h7 i27->h7 i28->h7 i29->h7 i30->h7 i31->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i32->h7 i33->h7 i34->h7 i35->h7 i36->h7 i37->h7 i38->h7 i39->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i40->h7 i41->h7 i42->h7 i43->h7 i44->h7 i45->h7 i46->h7 i47->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i48->h7 i49->h7 i50->h7 i51->h7 i52->h7 i53->h7 i54->h7 i55->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i56->h7 i57->h7 i58->h7 i59->h7 i60->h7 i61->h7 i62->h7 i63->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i64->h7 i65->h7 i66->h7 i67->h7 i68->h7 i69->h7 i70->h7 i71->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i72->h7 i73->h7 i74->h7 i75->h7 i76->h7 i77->h7 i78->h7 i79->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i80->h7 i81->h7 i82->h7 i83->h7 i84->h7 i85->h7 i86->h7 i87->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i88->h7 i89->h7 i90->h7 i91->h7 i92->h7 i93->h7 i94->h7 i95->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i96->h7 i97->h7 i98->h7 i99->h7 i100->h7 i101->h7 i102->h7 i103->h7
## 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00
## i104->h7 i105->h7 i106->h7 i107->h7 i108->h7
## 0.00 0.00 0.00 0.00 0.00
## b->o h1->o h2->o h3->o h4->o h5->o h6->o h7->o
## 1.10 -1.53 -2.45 0.30 -0.17 -5.28 4.56 -0.21
vip(Adult_TDA_KDE_5.40.5_n5_NN1Fit0,50) + ggtitle("Adult_TDA_KDE_5.40.5_n5_NN1Fit TDA-Assited NN")

# Predict outcome using Adult_TDA_KDE_5.40.5_n5_NN1Fit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n5_NN1Fit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n5_nn1_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n5_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6678 1518
## >50K 738 834
##
## Accuracy : 0.769
## 95% CI : (0.7606, 0.7774)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.01165
##
## Kappa : 0.2876
##
## Mcnemar's Test P-Value : < 2e-16
##
## Sensitivity : 0.9005
## Specificity : 0.3546
## Pos Pred Value : 0.8148
## Neg Pred Value : 0.5305
## Prevalence : 0.7592
## Detection Rate : 0.6837
## Detection Prevalence : 0.8391
## Balanced Accuracy : 0.6275
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n5_nn1_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6678 1518
## >50K 738 834
##
## Accuracy : 0.769
## 95% CI : (0.7606, 0.7774)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.01165
##
## Kappa : 0.2876
##
## Mcnemar's Test P-Value : < 2e-16
##
## Sensitivity : 0.9005
## Specificity : 0.3546
## Pos Pred Value : 0.8148
## Neg Pred Value : 0.5305
## Prevalence : 0.7592
## Detection Rate : 0.6837
## Detection Prevalence : 0.8391
## Balanced Accuracy : 0.6275
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n5_nn1_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.690418e-01 2.876467e-01 7.605534e-01 7.773695e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.164745e-02 1.883989e-60
ad_tda_kde_5.40.5_n5_nn1_cf0_ov_acc<-ad_tda_kde_5.40.5_n5_nn1_cf0$overall[1]
ad_tda_kde_5.40.5_n5_nn1_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9004854 0.3545918 0.8147877
## Neg Pred Value Precision Recall
## 0.5305344 0.8147877 0.9004854
## F1 Prevalence Detection Rate
## 0.8554958 0.7592138 0.6836609
## Detection Prevalence Balanced Accuracy
## 0.8390663 0.6275386
ad_tda_kde_5.40.5_n5_nn1_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n5_nn1_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_nn1_n5_3_fold<-(ad_nn1_fit_re - ad_tda_kde_5.40.5_n5_nn1_fit_re)
diff_tda_kde_5.40.5_nn1_n5_3_fold
## Accuracy
## 1 -0.01062963
## 2 -0.07354400
## 3 -0.06660064
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_nn1.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nn1_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_nn1.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_nn1.n5_3_fold_odds.left<-bst_tda_kde_5.40.5_nn1.n5_3_fold$probLeft/bst_tda_kde_5.40.5_nn1.n5_3_fold$probRight
bst_tda_kde_5.40.5_nn1.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_nn1.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nn1_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_nn1.n5_3_fold
## $winLeft
## [1] 0.9636667
##
## $winRope
## [1] 0.03633333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_nn1.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nn1_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nn1.n5_3_fold
## $left
## [1] 0.8889447
##
## $rope
## [1] 0.05106214
##
## $right
## [1] 0.05999317
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_nn1_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_nn1.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nn1_n5_3_fold))
#bf_tda_kde_5.40.5_nn1.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_nn1_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_nn1_n5_3_fold)
## t = -2.5236, df = 2, p-value = 0.1276
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.13594694 0.03543076
## sample estimates:
## mean of x
## -0.05025809
### Test set diff
diff_tda_kde_5.40.5_nn1.n5_test<-(nn1_cf_ov_acc - ad_tda_kde_5.40.5_n5_nn1_cf0_ov_acc)
diff_tda_kde_5.40.5_nn1.n5_test
## Accuracy
## -0.00982801
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_nn1.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nn1.n5_test),-0.01,0.01)
bst_tda_kde_5.40.5_nn1.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_nn1.n5_test_odds.left<-bst_tda_kde_5.40.5_nn1.n5_test$probLeft/bst_tda_kde_5.40.5_nn1.n5_test$probRight
bst_tda_kde_5.40.5_nn1.n5_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_nn1.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nn1.n4_test),-0.01,0.01)
bsr_tda_kde_5.40.5_nn1.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1578667
##
## $winRight
## [1] 0.8421333
# Bayesian Correlated Test
bct_tda_kde_5.40.5_nn1.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nn1.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nn1.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_nn1.n5_test)))
#BayesFactor
#bf_tda_kde_5.40.5_nn1.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nn1.n5_test)) #bf_tda_pca_5.40.5_nn1.n5_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_nn1.n5_test))
##Logistic Regression
adultLrFit <- train(as.factor(adult_df1) ~ .,
data = adult.one_hot_df4Train,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
adultLrFit
## Generalized Linear Model
##
## 22793 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 15195, 15195, 15196
## Resampling results:
##
## Accuracy Kappa
## 0.8510946 0.5655136
adultLrFit$resample
## Accuracy Kappa Resample
## 1 0.8469334 0.5572254 Fold1
## 2 0.8558831 0.5769366 Fold2
## 3 0.8504673 0.5623789 Fold3
ad_lr_fit_re<-adultLrFit$resample[1]
summary(adultLrFit)
##
## Call:
## NULL
##
## Coefficients: (9 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.023e+13 6.477e+12 -1.580 0.114062
## V1 2.877e-02 1.982e-03 14.517 < 2e-16 ***
## V2.. 1.023e+13 6.477e+12 1.580 0.114062
## V2.Federal.gov 1.023e+13 6.477e+12 1.580 0.114062
## V2.Local.gov 1.023e+13 6.477e+12 1.580 0.114062
## V2.Never.worked -4.493e+15 6.477e+12 -693.776 < 2e-16 ***
## V2.Private 1.023e+13 6.477e+12 1.580 0.114062
## V2.Self.emp.inc 1.023e+13 6.477e+12 1.580 0.114062
## V2.Self.emp.not.inc 1.023e+13 6.477e+12 1.580 0.114062
## V2.State.gov 1.023e+13 6.477e+12 1.580 0.114062
## V2.Without.pay 1.023e+13 6.477e+12 1.580 0.114062
## V3 6.829e-07 2.062e-07 3.311 0.000929 ***
## V4.10th -1.171e+00 1.825e-01 -6.415 1.41e-10 ***
## V4.11th -1.000e+00 1.819e-01 -5.498 3.83e-08 ***
## V4.12th -7.853e-01 2.752e-01 -2.853 0.004333 **
## V4.1st.4th -1.871e+00 6.099e-01 -3.067 0.002162 **
## V4.5th.6th -1.244e+00 3.324e-01 -3.743 0.000182 ***
## V4.7th.8th -1.593e+00 2.140e-01 -7.447 9.58e-14 ***
## V4.9th -1.634e+00 2.940e-01 -5.558 2.73e-08 ***
## V4.Assoc.acdm 2.427e-01 1.188e-01 2.043 0.041024 *
## V4.Assoc.voc 2.679e-01 1.016e-01 2.637 0.008373 **
## V4.Bachelors 7.912e-01 6.671e-02 11.861 < 2e-16 ***
## V4.Doctorate 2.068e+00 1.960e-01 10.554 < 2e-16 ***
## V4.HS.grad -3.188e-01 6.025e-02 -5.291 1.21e-07 ***
## V4.Masters 1.203e+00 9.717e-02 12.381 < 2e-16 ***
## V4.Preschool -2.478e+01 4.565e+04 -0.001 0.999567
## V4.Prof.school 1.768e+00 1.640e-01 10.778 < 2e-16 ***
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -2.067e-01 1.836e-01 -1.126 0.260179
## V6.Married.AF.spouse 2.268e+00 6.704e-01 3.383 0.000716 ***
## V6.Married.civ.spouse 2.016e+00 3.637e-01 5.543 2.97e-08 ***
## V6.Married.spouse.absent -2.659e-01 3.270e-01 -0.813 0.416064
## V6.Never.married -5.888e-01 1.902e-01 -3.096 0.001963 **
## V6.Separated -9.858e-02 2.441e-01 -0.404 0.686323
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 2.023e-01 1.192e-01 1.697 0.089726 .
## V7.Armed.Forces -7.939e-01 1.624e+00 -0.489 0.624962
## V7.Craft.repair 2.979e-01 1.019e-01 2.923 0.003466 **
## V7.Exec.managerial 1.007e+00 1.044e-01 9.650 < 2e-16 ***
## V7.Farming.fishing -8.628e-01 1.757e-01 -4.910 9.12e-07 ***
## V7.Handlers.cleaners -5.935e-01 1.771e-01 -3.351 0.000806 ***
## V7.Machine.op.inspct -2.407e-02 1.274e-01 -0.189 0.850211
## V7.Other.service -6.090e-01 1.490e-01 -4.087 4.38e-05 ***
## V7.Priv.house.serv -3.405e+00 1.946e+00 -1.750 0.080130 .
## V7.Prof.specialty 6.747e-01 1.122e-01 6.012 1.83e-09 ***
## V7.Protective.serv 7.220e-01 1.550e-01 4.658 3.20e-06 ***
## V7.Sales 4.921e-01 1.077e-01 4.570 4.88e-06 ***
## V7.Tech.support 9.149e-01 1.416e-01 6.461 1.04e-10 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.421e+00 1.220e-01 -11.642 < 2e-16 ***
## V8.Not.in.family -9.011e-01 3.368e-01 -2.675 0.007467 **
## V8.Other.relative -1.859e+00 3.008e-01 -6.181 6.38e-10 ***
## V8.Own.child -2.126e+00 3.330e-01 -6.383 1.74e-10 ***
## V8.Unmarried -1.056e+00 3.482e-01 -3.033 0.002418 **
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -7.414e-01 2.697e-01 -2.749 0.005977 **
## V9.Asian.Pac.Islander 4.763e-02 1.879e-01 0.253 0.799923
## V9.Black -1.683e-01 9.175e-02 -1.835 0.066569 .
## V9.Other -3.401e-01 3.230e-01 -1.053 0.292291
## V9.White NA NA NA NA
## V10.Female -8.625e-01 9.424e-02 -9.152 < 2e-16 ***
## V10.Male NA NA NA NA
## V11 3.164e-04 1.239e-05 25.527 < 2e-16 ***
## V12 6.201e-04 4.394e-05 14.113 < 2e-16 ***
## V13 2.963e-02 1.935e-03 15.312 < 2e-16 ***
## V14.. -4.819e-01 7.233e-01 -0.666 0.505218
## V14.Cambodia 1.147e+00 1.099e+00 1.043 0.296775
## V14.Canada -1.529e-01 7.754e-01 -0.197 0.843649
## V14.China -8.039e-01 8.419e-01 -0.955 0.339658
## V14.Columbia -1.342e+00 1.150e+00 -1.168 0.242916
## V14.Cuba 2.932e-01 7.928e-01 0.370 0.711537
## V14.Dominican.Republic -1.810e+00 1.268e+00 -1.427 0.153437
## V14.Ecuador -2.579e-01 1.069e+00 -0.241 0.809409
## V14.El.Salvador -8.031e-01 9.135e-01 -0.879 0.379330
## V14.England 1.845e-01 7.991e-01 0.231 0.817367
## V14.France 2.495e-01 1.007e+00 0.248 0.804303
## V14.Germany 1.113e-01 7.687e-01 0.145 0.884870
## V14.Greece -1.073e+00 9.239e-01 -1.161 0.245646
## V14.Guatemala 2.472e-01 1.066e+00 0.232 0.816614
## V14.Haiti -1.946e+00 1.419e+00 -1.371 0.170353
## V14.Holand.Netherlands -2.264e+01 3.459e+05 0.000 0.999948
## V14.Honduras -1.543e+00 2.583e+00 -0.597 0.550296
## V14.Hong 3.604e-02 1.018e+00 0.035 0.971757
## V14.Hungary -4.154e-01 1.208e+00 -0.344 0.731004
## V14.India -7.107e-01 8.101e-01 -0.877 0.380320
## V14.Iran -1.027e-01 8.592e-01 -0.120 0.904856
## V14.Ireland 4.688e-01 1.051e+00 0.446 0.655675
## V14.Italy 3.216e-01 8.264e-01 0.389 0.697120
## V14.Jamaica -1.134e+00 9.678e-01 -1.172 0.241185
## V14.Japan 3.956e-01 8.559e-01 0.462 0.643909
## V14.Laos -1.050e+00 1.320e+00 -0.795 0.426381
## V14.Mexico -6.909e-01 7.463e-01 -0.926 0.354542
## V14.Nicaragua -2.445e+01 5.937e+04 0.000 0.999671
## V14.Outlying.US.Guam.USVI.etc. -2.389e+01 9.718e+04 0.000 0.999804
## V14.Peru -1.208e+00 1.440e+00 -0.839 0.401520
## V14.Philippines 1.230e-02 7.716e-01 0.016 0.987281
## V14.Poland -4.460e-01 8.546e-01 -0.522 0.601727
## V14.Portugal -2.091e-01 1.041e+00 -0.201 0.840857
## V14.Puerto.Rico -1.305e+00 8.687e-01 -1.502 0.133081
## V14.Scotland -1.326e-01 1.076e+00 -0.123 0.901896
## V14.South -1.915e+00 9.039e-01 -2.118 0.034147 *
## V14.Taiwan -4.376e-01 9.296e-01 -0.471 0.637811
## V14.Thailand -3.764e-01 1.237e+00 -0.304 0.761007
## V14.Trinadad.Tobago -4.231e-01 1.156e+00 -0.366 0.714328
## V14.United.States -1.082e-01 7.053e-01 -0.153 0.878016
## V14.Vietnam -1.432e+00 9.958e-01 -1.438 0.150307
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 25165 on 22792 degrees of freedom
## Residual deviance: 14343 on 22693 degrees of freedom
## AIC: 14543
##
## Number of Fisher Scoring iterations: 25
vip(adultLrFit,25) + ggtitle('non-TDA-Assisted LR')

# Predict outcome using model from training data based on testing data
predictions <- predict(adultLrFit, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
lr_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
lr_cf
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6923 940
## >50K 493 1412
##
## Accuracy : 0.8533
## 95% CI : (0.8461, 0.8603)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5709
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9335
## Specificity : 0.6003
## Pos Pred Value : 0.8805
## Neg Pred Value : 0.7412
## Prevalence : 0.7592
## Detection Rate : 0.7087
## Detection Prevalence : 0.8050
## Balanced Accuracy : 0.7669
##
## 'Positive' Class : <=50K
##
lr_cf$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.532965e-01 5.709073e-01 8.461236e-01 8.602580e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 7.606349e-117 4.844546e-32
lr_cf_ov_acc<-lr_cf$overall[1]
lr_cf$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9335221 0.6003401 0.8804528
## Neg Pred Value Precision Recall
## 0.7412073 0.8804528 0.9335221
## F1 Prevalence Detection Rate
## 0.9062111 0.7592138 0.7087428
## Detection Prevalence Balanced Accuracy
## 0.8049754 0.7669311
lr_cf_pre_rec_f1<-lr_cf$byClass[5:7]
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_PC_5.40.5_n1_LrFit0 <- glm(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n1.vec, family = 'binomial')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.40.5_n1_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_adult_5.40.5.n1.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.40.5_n1_LrFit0
## Generalized Linear Model
##
## 3373 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 2248, 2249, 2249
## Resampling results:
##
## Accuracy Kappa
## 0.967105 -0.007496659
Adult_TDA_PC_5.40.5_n1_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.9217778 -0.014572957 Fold1
## 2 0.9893238 -0.004019652 Fold2
## 3 0.9902135 -0.003897369 Fold3
ad_tda_pc_5.40.5_n1_lr_fit_re<-Adult_TDA_PC_5.40.5_n1_LrFit0$resample[1]
summary(Adult_TDA_PC_5.40.5_n1_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (31 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 7.794e+15 1.126e+08 69239921 <2e-16 ***
## V1 -9.227e+12 1.202e+05 -76762987 <2e-16 ***
## V2.. 1.900e+14 1.601e+07 11869114 <2e-16 ***
## V2.Federal.gov -3.718e+14 7.285e+06 -51033622 <2e-16 ***
## V2.Local.gov -6.124e+13 6.477e+06 -9456257 <2e-16 ***
## V2.Never.worked NA NA NA NA
## V2.Private -3.302e+13 5.275e+06 -6260046 <2e-16 ***
## V2.Self.emp.inc -1.830e+14 5.981e+06 -30594761 <2e-16 ***
## V2.Self.emp.not.inc -3.051e+14 6.090e+06 -50107245 <2e-16 ***
## V2.State.gov NA NA NA NA
## V2.Without.pay NA NA NA NA
## V3 7.779e+05 1.216e+01 63957 <2e-16 ***
## V4.10th -3.505e+15 4.050e+07 -86531925 <2e-16 ***
## V4.11th 3.795e+14 3.399e+07 11165219 <2e-16 ***
## V4.12th 1.604e+14 4.778e+07 3357530 <2e-16 ***
## V4.1st.4th 3.935e+15 7.043e+07 55872561 <2e-16 ***
## V4.5th.6th 2.675e+14 6.741e+07 3968388 <2e-16 ***
## V4.7th.8th -2.890e+15 2.205e+07 -131054997 <2e-16 ***
## V4.9th 2.139e+14 6.740e+07 3173565 <2e-16 ***
## V4.Assoc.acdm -2.174e+14 8.499e+06 -25579171 <2e-16 ***
## V4.Assoc.voc -1.686e+14 7.782e+06 -21669281 <2e-16 ***
## V4.Bachelors -2.940e+14 4.808e+06 -61144356 <2e-16 ***
## V4.Doctorate -5.592e+14 6.678e+06 -83742625 <2e-16 ***
## V4.HS.grad -1.217e+13 5.804e+06 -2096845 <2e-16 ***
## V4.Masters -2.672e+14 5.283e+06 -50580689 <2e-16 ***
## V4.Preschool NA NA NA NA
## V4.Prof.school -4.472e+14 6.126e+06 -73005397 <2e-16 ***
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -3.346e+15 7.779e+07 -43005463 <2e-16 ***
## V6.Married.AF.spouse -7.656e+15 1.287e+08 -59483241 <2e-16 ***
## V6.Married.civ.spouse -7.937e+15 1.099e+08 -72246183 <2e-16 ***
## V6.Married.spouse.absent NA NA NA NA
## V6.Never.married NA NA NA NA
## V6.Separated NA NA NA NA
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 2.775e+14 1.490e+07 18622394 <2e-16 ***
## V7.Armed.Forces 4.830e+14 6.801e+07 7102466 <2e-16 ***
## V7.Craft.repair 1.394e+14 9.570e+06 14563871 <2e-16 ***
## V7.Exec.managerial 1.402e+14 8.628e+06 16251575 <2e-16 ***
## V7.Farming.fishing 1.033e+13 1.143e+07 904262 <2e-16 ***
## V7.Handlers.cleaners 1.423e+15 2.971e+07 47892601 <2e-16 ***
## V7.Machine.op.inspct -1.784e+14 1.854e+07 -9619362 <2e-16 ***
## V7.Other.service -2.821e+14 4.083e+07 -6908544 <2e-16 ***
## V7.Priv.house.serv NA NA NA NA
## V7.Prof.specialty 1.782e+14 8.885e+06 20054412 <2e-16 ***
## V7.Protective.serv 1.130e+14 1.145e+07 9864136 <2e-16 ***
## V7.Sales 2.030e+14 9.085e+06 22338096 <2e-16 ***
## V7.Tech.support -6.755e+14 1.261e+07 -53579240 <2e-16 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband 3.353e+15 3.040e+07 110266952 <2e-16 ***
## V8.Not.in.family -3.937e+15 8.131e+07 -48422559 <2e-16 ***
## V8.Other.relative NA NA NA NA
## V8.Own.child NA NA NA NA
## V8.Unmarried NA NA NA NA
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -3.229e+15 3.367e+07 -95892048 <2e-16 ***
## V9.Asian.Pac.Islander -8.611e+14 1.140e+07 -75562321 <2e-16 ***
## V9.Black 2.648e+13 1.093e+07 2421904 <2e-16 ***
## V9.Other -1.473e+15 2.856e+07 -51592105 <2e-16 ***
## V9.White NA NA NA NA
## V10.Female NA NA NA NA
## V10.Male NA NA NA NA
## V11 1.131e+09 6.370e+01 17761773 <2e-16 ***
## V12 9.589e+09 1.708e+03 5614454 <2e-16 ***
## V13 -6.129e+12 1.068e+05 -57403398 <2e-16 ***
## V14.. 1.077e+15 3.480e+07 30942881 <2e-16 ***
## V14.Cambodia NA NA NA NA
## V14.Canada 1.643e+15 3.705e+07 44334024 <2e-16 ***
## V14.China 2.799e+15 4.063e+07 68886260 <2e-16 ***
## V14.Columbia 2.572e+15 7.550e+07 34070629 <2e-16 ***
## V14.Cuba 2.279e+15 4.070e+07 55992207 <2e-16 ***
## V14.Dominican.Republic NA NA NA NA
## V14.Ecuador 2.214e+15 5.850e+07 37848677 <2e-16 ***
## V14.El.Salvador 2.291e+15 4.539e+07 50479266 <2e-16 ***
## V14.England 1.332e+15 3.815e+07 34907181 <2e-16 ***
## V14.France -8.320e+14 4.361e+07 -19078414 <2e-16 ***
## V14.Germany 2.322e+15 3.748e+07 61964232 <2e-16 ***
## V14.Greece 2.759e+15 4.388e+07 62885410 <2e-16 ***
## V14.Guatemala NA NA NA NA
## V14.Haiti NA NA NA NA
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras NA NA NA NA
## V14.Hong 3.116e+15 5.960e+07 52279623 <2e-16 ***
## V14.Hungary 2.274e+15 5.867e+07 38752454 <2e-16 ***
## V14.India 2.318e+15 3.763e+07 61587443 <2e-16 ***
## V14.Iran 7.275e+14 4.039e+07 18010602 <2e-16 ***
## V14.Ireland 2.248e+15 5.845e+07 38459941 <2e-16 ***
## V14.Italy -1.585e+15 3.971e+07 -39911065 <2e-16 ***
## V14.Jamaica 2.511e+15 7.623e+07 32936114 <2e-16 ***
## V14.Japan -5.501e+14 4.007e+07 -13727127 <2e-16 ***
## V14.Laos NA NA NA NA
## V14.Mexico 1.156e+15 4.152e+07 27838314 <2e-16 ***
## V14.Nicaragua NA NA NA NA
## V14.Outlying.US.Guam.USVI.etc. NA NA NA NA
## V14.Peru 1.984e+15 7.533e+07 26338332 <2e-16 ***
## V14.Philippines 2.475e+15 3.974e+07 62284837 <2e-16 ***
## V14.Poland -7.120e+14 5.181e+07 -13743542 <2e-16 ***
## V14.Portugal 2.554e+15 7.551e+07 33828344 <2e-16 ***
## V14.Puerto.Rico 4.801e+14 4.839e+07 9920182 <2e-16 ***
## V14.Scotland 2.573e+15 7.537e+07 34140859 <2e-16 ***
## V14.South 1.221e+15 4.399e+07 27749907 <2e-16 ***
## V14.Taiwan 2.812e+15 3.967e+07 70887541 <2e-16 ***
## V14.Thailand 8.287e+14 5.954e+07 13917700 <2e-16 ***
## V14.Trinadad.Tobago NA NA NA NA
## V14.United.States 2.101e+15 3.396e+07 61863098 <2e-16 ***
## V14.Vietnam 3.606e+15 7.643e+07 47179570 <2e-16 ***
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 304.8 on 3372 degrees of freedom
## Residual deviance: 1874.3 on 3295 degrees of freedom
## AIC: 2030.3
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.40.5_n1_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.40.5_n1_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.40.5_n1_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n1_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n1_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n1_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 98 20
## >50K 7318 2332
##
## Accuracy : 0.2488
## 95% CI : (0.2402, 0.2575)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0023
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.01321
## Specificity : 0.99150
## Pos Pred Value : 0.83051
## Neg Pred Value : 0.24166
## Prevalence : 0.75921
## Detection Rate : 0.01003
## Detection Prevalence : 0.01208
## Balanced Accuracy : 0.50236
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n1_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 98 20
## >50K 7318 2332
##
## Accuracy : 0.2488
## 95% CI : (0.2402, 0.2575)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.0023
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.01321
## Specificity : 0.99150
## Pos Pred Value : 0.83051
## Neg Pred Value : 0.24166
## Prevalence : 0.75921
## Detection Rate : 0.01003
## Detection Prevalence : 0.01208
## Balanced Accuracy : 0.50236
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n1_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.248771499 0.002287689 0.240223237 0.257469794 0.759213759
## AccuracyPValue McnemarPValue
## 1.000000000 0.000000000
ad_tda_pc_5.40.5_n1_lr_cf0_ov_acc<-ad_tda_pc_5.40.5_n1_lr_cf0$overall[1]
ad_tda_pc_5.40.5_n1_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.01321467 0.99149660 0.83050847
## Neg Pred Value Precision Recall
## 0.24165803 0.83050847 0.01321467
## F1 Prevalence Detection Rate
## 0.02601540 0.75921376 0.01003276
## Detection Prevalence Balanced Accuracy
## 0.01208026 0.50235563
ad_tda_pc_5.40.5_n1_lr_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n1_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers
### 3-fold diff
diff_tda_pca_5.40.5_lr_n1_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.40.5_n1_lr_fit_re)
diff_tda_pca_5.40.5_lr_n1_3_fold
## Accuracy
## 1 -0.07484437
## 2 -0.13344072
## 3 -0.13974623
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_lr.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_lr_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_lr.n1_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_lr.n1_3_fold_odds.left<-bst_tda_pca_5.40.5_lr.n1_3_fold$probLeft/bst_tda_pca_5.40.5_lr.n1_3_fold$probRight
bst_tda_pca_5.40.5_lr.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_lr.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_lr_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_lr.n1_3_fold
## $winLeft
## [1] 0.9915
##
## $winRope
## [1] 0.0085
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.40.5_lr.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_lr_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_lr.n1_3_fold
## $left
## [1] 0.9764466
##
## $rope
## [1] 0.006536675
##
## $right
## [1] 0.01701672
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_lr_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_lr.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_lr_n1_3_fold))
#bf_tda_pca_5.40.5_lr.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_lr_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_lr_n1_3_fold)
## t = -5.6143, df = 2, p-value = 0.03029
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.20491772 -0.02710317
## sample estimates:
## mean of x
## -0.1160104
### Test set diff
diff_tda_pca_5.40.5_lr.n1_test<-(lr_cf_ov_acc - ad_tda_pc_5.40.5_n1_lr_cf0_ov_acc)
diff_tda_pca_5.40.5_lr.n1_test
## Accuracy
## 0.604525
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_lr.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_lr.n1_test),-0.01,0.01)
bst_tda_pca_5.40.5_lr.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_lr.n1_test_odds.left<-bst_tda_pca_5.40.5_lr.n1_test$probLeft/bst_tda_pca_5.40.5_lr.n1_test$probRight
bst_tda_pca_5.40.5_lr.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_lr.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_lr.n1_test),-0.01,0.01)
bsr_tda_pca_5.40.5_lr.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1634
##
## $winRight
## [1] 0.8366
# Bayesian Correlated Test
bct_tda_pca_5.40.5_lr.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_lr.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_lr.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_lr.n1_test)))
#BayesFactor
#bf_tda_pca_5.40.5_lr.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_lr.n1_test)) #bf_tda_pca_5.40.5_lr.n1_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_lr.n1_test))
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node2
Adult_TDA_PC_5.40.5_n2_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_adult_5.40.5.n2.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.40.5_n2_LrFit0
## Generalized Linear Model
##
## 10276 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 6850, 6851, 6851
## Resampling results:
##
## Accuracy Kappa
## 0.6709772 0.2382677
Adult_TDA_PC_5.40.5_n2_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.7092820 0.373101325 Fold1
## 2 0.6072993 -0.004327221 Fold2
## 3 0.6963504 0.346028916 Fold3
ad_tda_pc_5.40.5_n2_lr_fit_re<-Adult_TDA_PC_5.40.5_n2_LrFit0$resample[1]
summary(Adult_TDA_PC_5.40.5_n2_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (12 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.407e+13 2.407e+13 -1.831 0.067089 .
## V1 -6.101e-04 2.228e-03 -0.274 0.784239
## V2.. 3.822e+13 1.810e+13 2.112 0.034710 *
## V2.Federal.gov 3.822e+13 1.810e+13 2.112 0.034709 *
## V2.Local.gov 3.822e+13 1.810e+13 2.112 0.034710 *
## V2.Never.worked NA NA NA NA
## V2.Private 3.822e+13 1.810e+13 2.112 0.034710 *
## V2.Self.emp.inc 3.822e+13 1.810e+13 2.112 0.034710 *
## V2.Self.emp.not.inc 3.822e+13 1.810e+13 2.112 0.034710 *
## V2.State.gov 3.822e+13 1.810e+13 2.112 0.034709 *
## V2.Without.pay 3.822e+13 1.810e+13 2.112 0.034709 *
## V3 1.216e-06 2.386e-07 5.097 3.45e-07 ***
## V4.10th -1.493e-01 2.146e-01 -0.696 0.486528
## V4.11th -5.168e-02 2.543e-01 -0.203 0.838952
## V4.12th 7.819e-02 3.367e-01 0.232 0.816335
## V4.1st.4th 6.884e-01 8.863e-01 0.777 0.437346
## V4.5th.6th 3.228e-01 4.838e-01 0.667 0.504607
## V4.7th.8th -6.676e-01 2.264e-01 -2.949 0.003191 **
## V4.9th -3.572e-01 3.713e-01 -0.962 0.335925
## V4.Assoc.acdm -2.324e-01 1.296e-01 -1.793 0.072994 .
## V4.Assoc.voc -2.598e-01 1.092e-01 -2.379 0.017347 *
## V4.Bachelors 2.593e-01 7.265e-02 3.568 0.000359 ***
## V4.Doctorate 9.387e-01 1.968e-01 4.769 1.85e-06 ***
## V4.HS.grad -2.324e-01 6.665e-02 -3.487 0.000489 ***
## V4.Masters 6.936e-01 1.053e-01 6.586 4.52e-11 ***
## V4.Preschool NA NA NA NA
## V4.Prof.school 6.451e-01 1.746e-01 3.696 0.000219 ***
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced 5.857e+12 8.219e+12 0.713 0.476116
## V6.Married.AF.spouse 5.857e+12 8.219e+12 0.713 0.476116
## V6.Married.civ.spouse 5.857e+12 8.219e+12 0.713 0.476116
## V6.Married.spouse.absent 5.857e+12 8.219e+12 0.713 0.476116
## V6.Never.married 5.857e+12 8.219e+12 0.713 0.476116
## V6.Separated 5.857e+12 8.219e+12 0.713 0.476116
## V6.Widowed 5.857e+12 8.219e+12 0.713 0.476116
## V7.. NA NA NA NA
## V7.Adm.clerical 1.174e+00 1.622e-01 7.241 4.45e-13 ***
## V7.Armed.Forces 2.544e+01 4.547e+05 0.000 0.999955
## V7.Craft.repair 5.911e-02 9.923e-02 0.596 0.551370
## V7.Exec.managerial 8.265e-01 1.031e-01 8.018 1.08e-15 ***
## V7.Farming.fishing -5.710e-01 1.568e-01 -3.641 0.000272 ***
## V7.Handlers.cleaners 7.427e-01 2.249e-01 3.302 0.000959 ***
## V7.Machine.op.inspct 9.437e-01 1.499e-01 6.295 3.08e-10 ***
## V7.Other.service 6.674e-01 2.170e-01 3.075 0.002106 **
## V7.Priv.house.serv NA NA NA NA
## V7.Prof.specialty 6.226e-01 1.134e-01 5.491 4.00e-08 ***
## V7.Protective.serv 6.205e-01 1.546e-01 4.013 5.99e-05 ***
## V7.Sales 6.215e-01 1.098e-01 5.659 1.52e-08 ***
## V7.Tech.support 1.051e+00 1.603e-01 6.557 5.49e-11 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.759e+01 6.707e+03 -0.003 0.997907
## V8.Not.in.family -1.569e+01 6.707e+03 -0.002 0.998134
## V8.Other.relative -1.640e+01 6.707e+03 -0.002 0.998049
## V8.Own.child -1.621e+01 6.707e+03 -0.002 0.998072
## V8.Unmarried 3.895e+00 1.423e+04 0.000 0.999782
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo 8.385e-02 3.942e-01 0.213 0.831545
## V9.Asian.Pac.Islander 3.760e-01 2.189e-01 1.718 0.085858 .
## V9.Black 1.428e+00 1.805e-01 7.911 2.55e-15 ***
## V9.Other 6.009e-01 5.236e-01 1.148 0.251056
## V9.White NA NA NA NA
## V10.Female 2.021e+01 2.705e+04 0.001 0.999404
## V10.Male NA NA NA NA
## V11 2.628e-04 1.505e-05 17.457 < 2e-16 ***
## V12 4.563e-04 4.835e-05 9.437 < 2e-16 ***
## V13 9.090e-03 2.134e-03 4.261 2.04e-05 ***
## V14.. -4.094e-01 7.547e-01 -0.542 0.587511
## V14.Cambodia 2.704e+01 1.601e+05 0.000 0.999865
## V14.Canada 1.954e-01 8.033e-01 0.243 0.807788
## V14.China -1.112e+00 8.743e-01 -1.272 0.203343
## V14.Columbia -1.936e+00 1.371e+00 -1.412 0.157969
## V14.Cuba 6.579e-01 8.684e-01 0.758 0.448730
## V14.Dominican.Republic -2.712e+01 3.323e+05 0.000 0.999935
## V14.Ecuador 5.720e-01 1.379e+00 0.415 0.678398
## V14.El.Salvador 7.624e-01 1.174e+00 0.649 0.516140
## V14.England 2.200e-01 8.443e-01 0.261 0.794425
## V14.France 3.260e-01 1.048e+00 0.311 0.755834
## V14.Germany 3.628e-01 8.106e-01 0.448 0.654406
## V14.Greece -1.226e+00 9.702e-01 -1.263 0.206489
## V14.Guatemala 2.391e+01 4.279e+05 0.000 0.999955
## V14.Haiti -1.593e+00 1.466e+00 -1.086 0.277416
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras -3.123e+01 2.787e+04 -0.001 0.999106
## V14.Hong -1.171e-01 1.177e+00 -0.099 0.920777
## V14.Hungary 6.350e-03 1.234e+00 0.005 0.995894
## V14.India -1.125e+00 8.265e-01 -1.361 0.173578
## V14.Iran -3.755e-01 8.938e-01 -0.420 0.674411
## V14.Ireland 2.622e+01 1.848e+05 0.000 0.999887
## V14.Italy 1.385e-01 8.353e-01 0.166 0.868285
## V14.Jamaica -5.475e-01 1.136e+00 -0.482 0.629679
## V14.Japan -5.785e-01 8.783e-01 -0.659 0.510150
## V14.Laos 2.575e+01 4.859e+05 0.000 0.999958
## V14.Mexico 3.309e-01 8.242e-01 0.401 0.688069
## V14.Nicaragua -9.381e-02 1.611e+00 -0.058 0.953576
## V14.Outlying.US.Guam.USVI.etc. -2.516e+01 2.619e+05 0.000 0.999923
## V14.Peru 8.124e-01 1.637e+00 0.496 0.619710
## V14.Philippines 7.155e-01 8.472e-01 0.845 0.398352
## V14.Poland -1.016e-01 8.876e-01 -0.115 0.908840
## V14.Portugal -1.716e-01 1.230e+00 -0.139 0.889116
## V14.Puerto.Rico 6.469e-01 1.148e+00 0.563 0.573193
## V14.Scotland 2.685e+01 2.979e+05 0.000 0.999928
## V14.South -7.416e-01 9.084e-01 -0.816 0.414247
## V14.Taiwan -7.607e-01 9.396e-01 -0.810 0.418185
## V14.Thailand -3.482e-01 1.454e+00 -0.240 0.810683
## V14.Trinadad.Tobago 2.602e+01 2.845e+05 0.000 0.999927
## V14.United.States 1.120e-01 7.348e-01 0.152 0.878850
## V14.Vietnam -1.051e+00 1.183e+00 -0.888 0.374299
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 13736 on 10275 degrees of freedom
## Residual deviance: 11002 on 10179 degrees of freedom
## AIC: 11196
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.40.5_n2_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.40.5_n2_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.40.5_n2_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n2_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n2_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n2_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 984 360
## >50K 6432 1992
##
## Accuracy : 0.3047
## 95% CI : (0.2955, 0.3139)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.0108
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.1327
## Specificity : 0.8469
## Pos Pred Value : 0.7321
## Neg Pred Value : 0.2365
## Prevalence : 0.7592
## Detection Rate : 0.1007
## Detection Prevalence : 0.1376
## Balanced Accuracy : 0.4898
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n2_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 984 360
## >50K 6432 1992
##
## Accuracy : 0.3047
## 95% CI : (0.2955, 0.3139)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.0108
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.1327
## Specificity : 0.8469
## Pos Pred Value : 0.7321
## Neg Pred Value : 0.2365
## Prevalence : 0.7592
## Detection Rate : 0.1007
## Detection Prevalence : 0.1376
## Balanced Accuracy : 0.4898
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n2_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.3046683 -0.0108296 0.2955489 0.3139043 0.7592138
## AccuracyPValue McnemarPValue
## 1.0000000 0.0000000
ad_tda_pc_5.40.5_n2_lr_cf0_ov_acc<-ad_tda_pc_5.40.5_n2_lr_cf0$overall[1]
ad_tda_pc_5.40.5_n2_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.1326861 0.8469388 0.7321429
## Neg Pred Value Precision Recall
## 0.2364672 0.7321429 0.1326861
## F1 Prevalence Detection Rate
## 0.2246575 0.7592138 0.1007371
## Detection Prevalence Balanced Accuracy
## 0.1375921 0.4898124
ad_tda_pc_5.40.5_n2_lr_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n2_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers
### 3-fold diff
diff_tda_pca_5.40.5_lr_n2_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.40.5_n2_lr_fit_re)
diff_tda_pca_5.40.5_lr_n2_3_fold
## Accuracy
## 1 0.1376514
## 2 0.2485839
## 3 0.1541169
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_lr.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_lr_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_lr.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_lr.n2_3_fold_odds.left<-bst_tda_pca_5.40.5_lr.n2_3_fold$probLeft/bst_tda_pca_5.40.5_lr.n2_3_fold$probRight
bst_tda_pca_5.40.5_lr.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_lr.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_lr_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_lr.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.0091
##
## $winRight
## [1] 0.9909
# Bayesian Correlated Test
bct_tda_pca_5.40.5_lr.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_lr_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_lr.n2_3_fold
## $left
## [1] 0.02067505
##
## $rope
## [1] 0.004760408
##
## $right
## [1] 0.9745645
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_lr_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_lr.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_lr_n2_3_fold))
#bf_tda_pca_5.40.5_lr.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_lr_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_lr_n2_3_fold)
## t = 5.2115, df = 2, p-value = 0.0349
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.03141071 0.32882410
## sample estimates:
## mean of x
## 0.1801174
### Test set diff
diff_tda_pca_5.40.5_lr.n2_test<-(lr_cf_ov_acc - ad_tda_pc_5.40.5_n2_lr_cf0_ov_acc)
diff_tda_pca_5.40.5_lr.n2_test
## Accuracy
## 0.5486282
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_lr.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_lr.n2_test),-0.01,0.01)
bst_tda_pca_5.40.5_lr.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_lr.n2_test_odds.left<-bst_tda_pca_5.40.5_lr.n2_test$probLeft/bst_tda_pca_5.40.5_lr.n2_test$probRight
bst_tda_pca_5.40.5_lr.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_lr.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_lr.n2_test),-0.01,0.01)
bsr_tda_pca_5.40.5_lr.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1561667
##
## $winRight
## [1] 0.8438333
# Bayesian Correlated Test
bct_tda_pca_5.40.5_lr.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_lr.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_lr.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_lr.n2_test)))
#BayesFactor
#bf_tda_pca_5.40.5_lr.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_lr.n2_test)) #bf_tda_pca_5.40.5_lr.n2_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_lr.n2_test))
##Node3
Adult_TDA_PC_5.40.5_n3_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_adult_5.40.5.n3.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.40.5_n3_LrFit0
## Generalized Linear Model
##
## 11563 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7708, 7708, 7710
## Resampling results:
##
## Accuracy Kappa
## 0.8642225 0.5365206
Adult_TDA_PC_5.40.5_n3_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.8666667 0.5433624 Fold1
## 2 0.8594034 0.5229379 Fold2
## 3 0.8665975 0.5432615 Fold3
ad_tda_pc_5.40.5_n3_lr_fit_re<-Adult_TDA_PC_5.40.5_n2_LrFit0$resample[1]
summary(Adult_TDA_PC_5.40.5_n3_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (11 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.803e+13 1.163e+13 1.550 0.121182
## V1 -5.397e-03 2.909e-03 -1.855 0.063578 .
## V2.. -1.803e+13 1.163e+13 -1.550 0.121182
## V2.Federal.gov -1.803e+13 1.163e+13 -1.550 0.121182
## V2.Local.gov -1.803e+13 1.163e+13 -1.550 0.121182
## V2.Never.worked NA NA NA NA
## V2.Private -1.803e+13 1.163e+13 -1.550 0.121182
## V2.Self.emp.inc -1.803e+13 1.163e+13 -1.550 0.121182
## V2.Self.emp.not.inc -1.803e+13 1.163e+13 -1.550 0.121182
## V2.State.gov -1.803e+13 1.163e+13 -1.550 0.121182
## V2.Without.pay -1.803e+13 1.163e+13 -1.550 0.121182
## V3 1.227e-06 2.921e-07 4.200 2.67e-05 ***
## V4.10th -2.850e-02 2.121e-01 -0.134 0.893110
## V4.11th 1.232e-01 2.030e-01 0.607 0.543848
## V4.12th 3.389e-01 3.088e-01 1.098 0.272372
## V4.1st.4th -6.829e-01 5.521e-01 -1.237 0.216163
## V4.5th.6th -8.191e-01 4.082e-01 -2.006 0.044805 *
## V4.7th.8th -1.211e+00 3.186e-01 -3.801 0.000144 ***
## V4.9th -6.215e-01 2.876e-01 -2.161 0.030696 *
## V4.Assoc.acdm -7.835e-01 1.808e-01 -4.334 1.46e-05 ***
## V4.Assoc.voc -5.034e-01 1.610e-01 -3.126 0.001773 **
## V4.Bachelors -8.702e-01 1.063e-01 -8.187 2.67e-16 ***
## V4.Doctorate -3.791e-01 2.429e-01 -1.561 0.118492
## V4.HS.grad -4.174e-01 8.962e-02 -4.658 3.20e-06 ***
## V4.Masters -6.383e-01 1.401e-01 -4.556 5.21e-06 ***
## V4.Preschool -2.422e+01 8.346e+04 0.000 0.999768
## V4.Prof.school -3.844e-01 2.228e-01 -1.725 0.084507 .
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -5.764e-01 2.263e-01 -2.547 0.010852 *
## V6.Married.AF.spouse 1.472e+00 9.413e-01 1.564 0.117770
## V6.Married.civ.spouse -8.652e-01 3.720e-01 -2.326 0.020018 *
## V6.Married.spouse.absent -3.209e-01 3.452e-01 -0.930 0.352478
## V6.Never.married -3.522e-01 2.374e-01 -1.484 0.137884
## V6.Separated -4.518e-01 2.966e-01 -1.523 0.127674
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 1.802e+00 1.740e-01 10.354 < 2e-16 ***
## V7.Armed.Forces -2.462e+01 1.927e+05 0.000 0.999898
## V7.Craft.repair 1.711e-02 1.639e-01 0.104 0.916863
## V7.Exec.managerial 4.203e-01 1.637e-01 2.568 0.010219 *
## V7.Farming.fishing -1.183e+00 3.599e-01 -3.287 0.001013 **
## V7.Handlers.cleaners 9.947e-01 2.056e-01 4.838 1.31e-06 ***
## V7.Machine.op.inspct 1.095e+00 1.749e-01 6.260 3.84e-10 ***
## V7.Other.service 8.387e-01 1.956e-01 4.288 1.81e-05 ***
## V7.Priv.house.serv -2.417e+00 1.108e+01 -0.218 0.827298
## V7.Prof.specialty 3.677e-01 1.701e-01 2.162 0.030602 *
## V7.Protective.serv 4.760e-01 2.474e-01 1.924 0.054361 .
## V7.Sales 7.874e-01 1.681e-01 4.683 2.82e-06 ***
## V7.Tech.support 9.623e-01 2.226e-01 4.323 1.54e-05 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -2.068e+00 1.603e-01 -12.907 < 2e-16 ***
## V8.Not.in.family -1.824e-01 3.293e-01 -0.554 0.579636
## V8.Other.relative -6.772e-01 3.077e-01 -2.200 0.027772 *
## V8.Own.child -6.956e-01 3.208e-01 -2.169 0.030104 *
## V8.Unmarried 1.441e-01 3.491e-01 0.413 0.679685
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo 8.046e-01 3.025e-01 2.660 0.007817 **
## V9.Asian.Pac.Islander 9.112e-01 2.493e-01 3.656 0.000257 ***
## V9.Black 1.430e+00 1.111e-01 12.868 < 2e-16 ***
## V9.Other 9.549e-01 3.565e-01 2.679 0.007388 **
## V9.White NA NA NA NA
## V10.Female 1.851e+00 1.321e-01 14.010 < 2e-16 ***
## V10.Male NA NA NA NA
## V11 2.780e-04 1.688e-05 16.471 < 2e-16 ***
## V12 2.364e-04 6.185e-05 3.822 0.000132 ***
## V13 -3.632e-03 2.811e-03 -1.292 0.196316
## V14.. -1.719e+00 1.009e+00 -1.704 0.088314 .
## V14.Cambodia -1.515e-01 1.253e+00 -0.121 0.903803
## V14.Canada -1.618e+00 1.087e+00 -1.489 0.136590
## V14.China -2.803e+00 1.173e+00 -2.389 0.016883 *
## V14.Columbia -2.356e+00 1.475e+00 -1.597 0.110170
## V14.Cuba -5.173e-01 1.097e+00 -0.472 0.637207
## V14.Dominican.Republic -3.042e+00 1.545e+00 -1.969 0.048975 *
## V14.Ecuador -1.109e+00 1.474e+00 -0.753 0.451689
## V14.El.Salvador -1.602e+00 1.218e+00 -1.315 0.188578
## V14.England -1.419e+00 1.096e+00 -1.295 0.195293
## V14.France -1.199e+00 1.294e+00 -0.927 0.354138
## V14.Germany -1.254e+00 1.086e+00 -1.155 0.248162
## V14.Greece -3.113e+00 1.389e+00 -2.242 0.024975 *
## V14.Guatemala -1.501e+00 1.305e+00 -1.150 0.250119
## V14.Haiti -6.966e-01 1.222e+00 -0.570 0.568568
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras 2.428e+01 4.019e+05 0.000 0.999952
## V14.Hong -1.949e+00 1.851e+00 -1.053 0.292456
## V14.Hungary -2.239e+00 1.578e+00 -1.419 0.155853
## V14.India -2.462e+00 1.131e+00 -2.176 0.029532 *
## V14.Iran -2.686e+00 1.278e+00 -2.102 0.035544 *
## V14.Ireland -9.362e-01 1.445e+00 -0.648 0.517014
## V14.Italy -9.952e-01 1.135e+00 -0.877 0.380494
## V14.Jamaica -5.144e-01 1.116e+00 -0.461 0.644966
## V14.Japan -1.422e+00 1.207e+00 -1.177 0.239045
## V14.Laos -2.942e+00 1.689e+00 -1.742 0.081514 .
## V14.Mexico -1.828e+00 1.024e+00 -1.785 0.074325 .
## V14.Nicaragua -2.240e+00 1.555e+00 -1.441 0.149604
## V14.Outlying.US.Guam.USVI.etc. -2.554e+01 1.673e+05 0.000 0.999878
## V14.Peru -9.369e-01 1.485e+00 -0.631 0.528212
## V14.Philippines -9.813e-01 1.056e+00 -0.929 0.352967
## V14.Poland -8.324e-01 1.203e+00 -0.692 0.489025
## V14.Portugal -1.973e+00 1.584e+00 -1.245 0.212966
## V14.Puerto.Rico -1.126e+00 1.106e+00 -1.019 0.308382
## V14.Scotland -3.174e+00 1.876e+00 -1.692 0.090634 .
## V14.South -2.803e+00 1.192e+00 -2.351 0.018708 *
## V14.Taiwan -1.450e+00 1.248e+00 -1.161 0.245556
## V14.Thailand -2.264e+00 1.670e+00 -1.356 0.175204
## V14.Trinadad.Tobago -1.254e+00 1.312e+00 -0.955 0.339442
## V14.United.States -1.480e+00 9.856e-01 -1.502 0.133181
## V14.Vietnam -2.058e+00 1.241e+00 -1.659 0.097165 .
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 11831.9 on 11562 degrees of freedom
## Residual deviance: 7317.2 on 11465 degrees of freedom
## AIC: 7513.2
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.40.5_n3_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.40.5_n3_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.40.5_n3_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n3_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n3_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n3_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 3896 1821
## >50K 3520 531
##
## Accuracy : 0.4532
## 95% CI : (0.4433, 0.4632)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.1996
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.5254
## Specificity : 0.2258
## Pos Pred Value : 0.6815
## Neg Pred Value : 0.1311
## Prevalence : 0.7592
## Detection Rate : 0.3989
## Detection Prevalence : 0.5853
## Balanced Accuracy : 0.3756
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n3_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 3896 1821
## >50K 3520 531
##
## Accuracy : 0.4532
## 95% CI : (0.4433, 0.4632)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.1996
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.5254
## Specificity : 0.2258
## Pos Pred Value : 0.6815
## Neg Pred Value : 0.1311
## Prevalence : 0.7592
## Detection Rate : 0.3989
## Detection Prevalence : 0.5853
## Balanced Accuracy : 0.3756
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n3_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 4.532146e-01 -1.996451e-01 4.433066e-01 4.631505e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 2.058531e-119
ad_tda_pc_5.40.5_n3_lr_cf0_ov_acc<-ad_tda_pc_5.40.5_n3_lr_cf0$overall[1]
ad_tda_pc_5.40.5_n3_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.5253506 0.2257653 0.6814763
## Neg Pred Value Precision Recall
## 0.1310787 0.6814763 0.5253506
## F1 Prevalence Detection Rate
## 0.5933146 0.7592138 0.3988534
## Detection Prevalence Balanced Accuracy
## 0.5852785 0.3755579
ad_tda_pc_5.40.5_n3_lr_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n3_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers
### 3-fold diff
diff_tda_pca_5.40.5_lr_n3_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.40.5_n3_lr_fit_re)
diff_tda_pca_5.40.5_lr_n3_3_fold
## Accuracy
## 1 0.1376514
## 2 0.2485839
## 3 0.1541169
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_lr.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_lr_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_lr.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_lr.n3_3_fold_odds.left<-bst_tda_pca_5.40.5_lr.n3_3_fold$probLeft/bst_tda_pca_5.40.5_lr.n3_3_fold$probRight
bst_tda_pca_5.40.5_lr.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_lr.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_lr_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_lr.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.0077
##
## $winRight
## [1] 0.9923
# Bayesian Correlated Test
bct_tda_pca_5.40.5_lr.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_lr_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_lr.n3_3_fold
## $left
## [1] 0.02067505
##
## $rope
## [1] 0.004760408
##
## $right
## [1] 0.9745645
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_lr_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_lr.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_lr_n3_3_fold))
#bf_tda_pca_5.40.5_lr.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_lr_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_lr_n3_3_fold)
## t = 5.2115, df = 2, p-value = 0.0349
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.03141071 0.32882410
## sample estimates:
## mean of x
## 0.1801174
### Test set diff
diff_tda_pca_5.40.5_lr.n3_test<-(lr_cf_ov_acc - ad_tda_pc_5.40.5_n3_lr_cf0_ov_acc)
diff_tda_pca_5.40.5_lr.n3_test
## Accuracy
## 0.4000819
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_lr.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_lr.n3_test),-0.01,0.01)
bst_tda_pca_5.40.5_lr.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_lr.n3_test_odds.left<-bst_tda_pca_5.40.5_lr.n3_test$probLeft/bst_tda_pca_5.40.5_lr.n3_test$probRight
bst_tda_pca_5.40.5_lr.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_lr.n3_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_lr.n3_test),-0.01,0.01)
bsr_tda_pca_5.40.5_lr.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1607333
##
## $winRight
## [1] 0.8392667
# Bayesian Correlated Test
bct_tda_pca_5.40.5_lr.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_lr.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_lr.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_lr.n3_test)))
#BayesFactor
#bf_tda_pca_5.40.5_lr.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_lr.n3_test)) #bf_tda_pca_5.40.5_lr.n3_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_lr.n3_test))
##Node4
Adult_TDA_PC_5.40.5_n4_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_adult_5.40.5.n4.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.40.5_n4_LrFit0
## Generalized Linear Model
##
## 14818 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9878, 9879, 9879
## Resampling results:
##
## Accuracy Kappa
## 0.9437159 0.1719718
Adult_TDA_PC_5.40.5_n4_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.9621457 0.29666063 Fold1
## 2 0.9591010 0.12915683 Fold2
## 3 0.9099008 0.09009785 Fold3
ad_tda_pc_5.40.5_n4_lr_fit_re<-Adult_TDA_PC_5.40.5_n4_LrFit0$resample[1]
summary(Adult_TDA_PC_5.40.5_n4_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (10 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 6.412e+11 9.399e+12 0.068 0.945608
## V1 1.519e-02 4.377e-03 3.472 0.000517 ***
## V2.. -6.412e+11 9.399e+12 -0.068 0.945608
## V2.Federal.gov -6.412e+11 9.399e+12 -0.068 0.945608
## V2.Local.gov -6.412e+11 9.399e+12 -0.068 0.945608
## V2.Never.worked -6.412e+11 9.399e+12 -0.068 0.945608
## V2.Private -6.412e+11 9.399e+12 -0.068 0.945608
## V2.Self.emp.inc -6.412e+11 9.399e+12 -0.068 0.945608
## V2.Self.emp.not.inc -6.412e+11 9.399e+12 -0.068 0.945608
## V2.State.gov -6.412e+11 9.399e+12 -0.068 0.945608
## V2.Without.pay -6.412e+11 9.399e+12 -0.068 0.945608
## V3 7.350e-07 4.291e-07 1.713 0.086754 .
## V4.10th -1.605e+00 6.216e-01 -2.581 0.009839 **
## V4.11th 4.964e-02 3.183e-01 0.156 0.876068
## V4.12th -3.181e-01 5.001e-01 -0.636 0.524736
## V4.1st.4th -2.254e+01 2.839e+04 -0.001 0.999367
## V4.5th.6th -2.777e-01 6.642e-01 -0.418 0.675875
## V4.7th.8th -9.178e-01 5.493e-01 -1.671 0.094740 .
## V4.9th 2.554e-01 4.222e-01 0.605 0.545301
## V4.Assoc.acdm -2.233e-01 2.381e-01 -0.938 0.348265
## V4.Assoc.voc -1.595e-01 2.252e-01 -0.708 0.478793
## V4.Bachelors 2.113e-01 1.442e-01 1.465 0.142957
## V4.Doctorate 8.018e-01 4.790e-01 1.674 0.094127 .
## V4.HS.grad -3.442e-01 1.284e-01 -2.682 0.007325 **
## V4.Masters 3.040e-01 2.173e-01 1.399 0.161788
## V4.Preschool -4.478e+01 1.134e+07 0.000 0.999997
## V4.Prof.school 3.716e-02 5.252e-01 0.071 0.943592
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced 1.835e-01 2.169e-01 0.846 0.397702
## V6.Married.AF.spouse 3.061e+00 8.153e-01 3.755 0.000174 ***
## V6.Married.civ.spouse 1.740e+00 4.737e-01 3.674 0.000239 ***
## V6.Married.spouse.absent -1.296e-01 4.366e-01 -0.297 0.766550
## V6.Never.married 1.012e-01 2.414e-01 0.419 0.675094
## V6.Separated -1.769e-01 3.169e-01 -0.558 0.576551
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 3.451e-01 3.334e-01 1.035 0.300586
## V7.Armed.Forces -2.248e+01 1.290e+05 0.000 0.999861
## V7.Craft.repair -3.170e-02 3.674e-01 -0.086 0.931258
## V7.Exec.managerial 3.084e-01 3.419e-01 0.902 0.367074
## V7.Farming.fishing -2.030e+00 1.036e+00 -1.960 0.049999 *
## V7.Handlers.cleaners -2.751e-01 4.662e-01 -0.590 0.555156
## V7.Machine.op.inspct -6.153e-01 4.103e-01 -1.500 0.133726
## V7.Other.service -1.293e-02 3.505e-01 -0.037 0.970582
## V7.Priv.house.serv -3.929e+00 2.541e+00 -1.546 0.122043
## V7.Prof.specialty 2.244e-01 3.471e-01 0.646 0.517985
## V7.Protective.serv 4.390e-01 5.046e-01 0.870 0.384254
## V7.Sales 8.373e-02 3.482e-01 0.240 0.809977
## V7.Tech.support 6.557e-02 3.973e-01 0.165 0.868918
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.930e+01 9.024e+02 -0.021 0.982935
## V8.Not.in.family 5.919e-02 4.361e-01 0.136 0.892026
## V8.Other.relative -1.111e+00 4.414e-01 -2.517 0.011835 *
## V8.Own.child -1.011e+00 4.228e-01 -2.390 0.016847 *
## V8.Unmarried 6.885e-02 4.485e-01 0.154 0.877994
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo 7.619e-01 3.593e-01 2.121 0.033929 *
## V9.Asian.Pac.Islander 8.478e-01 3.049e-01 2.781 0.005420 **
## V9.Black 4.978e-01 1.444e-01 3.447 0.000567 ***
## V9.Other 2.358e-01 5.227e-01 0.451 0.651914
## V9.White NA NA NA NA
## V10.Female 9.009e-01 1.337e-01 6.737 1.61e-11 ***
## V10.Male NA NA NA NA
## V11 3.732e-04 1.941e-05 19.221 < 2e-16 ***
## V12 3.180e-04 1.091e-04 2.915 0.003556 **
## V13 2.336e-02 4.150e-03 5.629 1.81e-08 ***
## V14.. -2.485e+00 1.370e+00 -1.814 0.069683 .
## V14.Cambodia -2.263e+01 3.387e+04 -0.001 0.999467
## V14.Canada -2.005e+00 1.468e+00 -1.365 0.172200
## V14.China -2.234e+00 1.520e+00 -1.470 0.141654
## V14.Columbia -2.491e+01 5.734e+04 0.000 0.999653
## V14.Cuba -3.219e+00 1.691e+00 -1.904 0.056929 .
## V14.Dominican.Republic -2.280e+00 1.699e+00 -1.341 0.179766
## V14.Ecuador -2.494e+01 1.008e+05 0.000 0.999803
## V14.El.Salvador -2.457e+01 4.202e+04 -0.001 0.999534
## V14.England -1.905e+00 1.440e+00 -1.323 0.185849
## V14.France -2.351e+00 1.805e+00 -1.303 0.192718
## V14.Germany -2.326e+00 1.454e+00 -1.600 0.109585
## V14.Greece -1.386e+00 1.768e+00 -0.784 0.433012
## V14.Guatemala -2.640e-02 1.533e+00 -0.017 0.986258
## V14.Haiti -2.542e+00 1.728e+00 -1.471 0.141333
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras -2.488e+01 1.078e+05 0.000 0.999816
## V14.Hong -2.643e+01 9.444e+04 0.000 0.999777
## V14.Hungary -1.335e+00 1.760e+00 -0.758 0.448256
## V14.India -2.101e+00 1.569e+00 -1.339 0.180475
## V14.Iran -2.531e+01 8.628e+04 0.000 0.999766
## V14.Ireland -1.500e+00 1.699e+00 -0.883 0.377335
## V14.Italy -2.374e+00 1.702e+00 -1.395 0.163066
## V14.Jamaica -2.420e+00 1.731e+00 -1.398 0.162041
## V14.Japan -1.050e+00 1.466e+00 -0.716 0.473999
## V14.Laos -2.382e+00 1.781e+00 -1.337 0.181208
## V14.Mexico -2.643e+00 1.426e+00 -1.853 0.063902 .
## V14.Nicaragua -2.430e+01 7.563e+04 0.000 0.999744
## V14.Outlying.US.Guam.USVI.etc. -2.591e+01 9.923e+04 0.000 0.999792
## V14.Peru -2.528e+01 9.419e+04 0.000 0.999786
## V14.Philippines -2.829e+00 1.443e+00 -1.960 0.050050 .
## V14.Poland -2.200e+00 1.686e+00 -1.305 0.192010
## V14.Portugal -1.713e+00 1.748e+00 -0.980 0.326910
## V14.Puerto.Rico -2.029e+00 1.470e+00 -1.380 0.167657
## V14.Scotland -2.593e+01 1.333e+05 0.000 0.999845
## V14.South -2.926e+00 1.570e+00 -1.864 0.062332 .
## V14.Taiwan -1.801e+00 1.558e+00 -1.156 0.247838
## V14.Thailand -2.629e+01 1.105e+05 0.000 0.999810
## V14.Trinadad.Tobago -2.562e+01 1.165e+05 0.000 0.999825
## V14.United.States -2.316e+00 1.323e+00 -1.751 0.080005 .
## V14.Vietnam -2.168e+00 1.616e+00 -1.341 0.179941
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 5218.0 on 14817 degrees of freedom
## Residual deviance: 3811.1 on 14719 degrees of freedom
## AIC: 4009.1
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.40.5_n4_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.40.5_n4_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.40.5_n4_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n4_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n4_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n4_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7408 2188
## >50K 8 164
##
## Accuracy : 0.7752
## 95% CI : (0.7668, 0.7834)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.0001051
##
## Kappa : 0.1004
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.99892
## Specificity : 0.06973
## Pos Pred Value : 0.77199
## Neg Pred Value : 0.95349
## Prevalence : 0.75921
## Detection Rate : 0.75839
## Detection Prevalence : 0.98239
## Balanced Accuracy : 0.53432
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n4_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7408 2188
## >50K 8 164
##
## Accuracy : 0.7752
## 95% CI : (0.7668, 0.7834)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.0001051
##
## Kappa : 0.1004
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.99892
## Specificity : 0.06973
## Pos Pred Value : 0.77199
## Neg Pred Value : 0.95349
## Prevalence : 0.75921
## Detection Rate : 0.75839
## Detection Prevalence : 0.98239
## Balanced Accuracy : 0.53432
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n4_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7751842752 0.1004311785 0.7667730166 0.7834311474 0.7592137592
## AccuracyPValue McnemarPValue
## 0.0001051039 0.0000000000
ad_tda_pc_5.40.5_n4_lr_cf0_ov_acc<-ad_tda_pc_5.40.5_n4_lr_cf0$overall[1]
ad_tda_pc_5.40.5_n4_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.99892125 0.06972789 0.77198833
## Neg Pred Value Precision Recall
## 0.95348837 0.77198833 0.99892125
## F1 Prevalence Detection Rate
## 0.87091465 0.75921376 0.75839476
## Detection Prevalence Balanced Accuracy
## 0.98239148 0.53432457
ad_tda_pc_5.40.5_n4_lr_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n4_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers
### 3-fold diff
diff_tda_pca_5.40.5_lr_n4_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.40.5_n4_lr_fit_re)
diff_tda_pca_5.40.5_lr_n4_3_fold
## Accuracy
## 1 -0.1152123
## 2 -0.1032179
## 3 -0.0594335
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_lr.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_lr_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_lr.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_lr.n4_3_fold_odds.left<-bst_tda_pca_5.40.5_lr.n4_3_fold$probLeft/bst_tda_pca_5.40.5_lr.n4_3_fold$probRight
bst_tda_pca_5.40.5_lr.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_lr.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_lr_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_lr.n4_3_fold
## $winLeft
## [1] 0.9904667
##
## $winRope
## [1] 0.009533333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.40.5_lr.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_lr_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_lr.n4_3_fold
## $left
## [1] 0.9740985
##
## $rope
## [1] 0.008647264
##
## $right
## [1] 0.01725419
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_lr_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_lr.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_lr_n4_3_fold))
#bf_tda_pca_5.40.5_lr.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_lr_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_lr_n4_3_fold)
## t = -5.464, df = 2, p-value = 0.0319
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.16555668 -0.01968582
## sample estimates:
## mean of x
## -0.09262125
### Test set diff
diff_tda_pca_5.40.5_lr.n4_test<-(lr_cf_ov_acc - ad_tda_pc_5.40.5_n4_lr_cf0_ov_acc)
diff_tda_pca_5.40.5_lr.n4_test
## Accuracy
## 0.0781122
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_lr.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_lr.n4_test),-0.01,0.01)
bst_tda_pca_5.40.5_lr.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_lr.n4_test_odds.left<-bst_tda_pca_5.40.5_lr.n4_test$probLeft/bst_tda_pca_5.40.5_lr.n4_test$probRight
bst_tda_pca_5.40.5_lr.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_lr.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_lr.n4_test),-0.01,0.01)
bsr_tda_pca_5.40.5_lr.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1587
##
## $winRight
## [1] 0.8413
# Bayesian Correlated Test
bct_tda_pca_5.40.5_lr.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_lr.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_lr.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_lr.n4_test)))
#BayesFactor
#bf_tda_pca_5.40.5_lr.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_lr.n4_test)) #bf_tda_pca_5.40.5_lr.n4_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_lr.n4_test))
##Node5
Adult_TDA_PC_5.40.5_n5_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_adult_5.40.5.n5.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_PC_5.40.5_n5_LrFit0
## Generalized Linear Model
##
## 12081 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8054, 8054, 8054
## Resampling results:
##
## Accuracy Kappa
## 0.9834451 0.002863384
Adult_TDA_PC_5.40.5_n5_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.9990067 -0.0003726245 Fold1
## 2 0.9990067 -0.0003726245 Fold2
## 3 0.9523218 0.0093354005 Fold3
ad_tda_pc_5.40.5_n5_lr_fit_re<-Adult_TDA_PC_5.40.5_n5_LrFit0$resample[1]
summary(Adult_TDA_PC_5.40.5_n5_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (9 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.528e+23 1.346e+16 18776801 <2e-16 ***
## V1 2.389e+13 6.427e+04 371651628 <2e-16 ***
## V2.. 1.154e+22 2.314e+14 49864713 <2e-16 ***
## V2.Federal.gov 1.154e+22 2.314e+14 49864730 <2e-16 ***
## V2.Local.gov 1.154e+22 2.314e+14 49864715 <2e-16 ***
## V2.Never.worked 1.154e+22 2.314e+14 49864730 <2e-16 ***
## V2.Private 1.154e+22 2.314e+14 49864724 <2e-16 ***
## V2.Self.emp.inc 1.154e+22 2.314e+14 49864716 <2e-16 ***
## V2.Self.emp.not.inc 1.154e+22 2.314e+14 49864721 <2e-16 ***
## V2.State.gov 1.154e+22 2.314e+14 49864730 <2e-16 ***
## V2.Without.pay 1.154e+22 2.314e+14 49864725 <2e-16 ***
## V3 7.745e+08 5.323e+00 145516729 <2e-16 ***
## V4.10th -1.234e+14 3.167e+06 -38952445 <2e-16 ***
## V4.11th -1.478e+15 2.721e+06 -543256694 <2e-16 ***
## V4.12th 8.749e+14 4.164e+06 210120087 <2e-16 ***
## V4.1st.4th -3.973e+14 7.756e+06 -51217457 <2e-16 ***
## V4.5th.6th 1.890e+15 5.879e+06 321479764 <2e-16 ***
## V4.7th.8th -5.671e+14 4.611e+06 -122975228 <2e-16 ***
## V4.9th 1.104e+15 4.415e+06 250073194 <2e-16 ***
## V4.Assoc.acdm 1.453e+15 3.633e+06 400118486 <2e-16 ***
## V4.Assoc.voc -1.893e+15 3.505e+06 -540294728 <2e-16 ***
## V4.Bachelors -2.043e+15 2.594e+06 -787806522 <2e-16 ***
## V4.Doctorate 5.261e+14 3.879e+07 13560647 <2e-16 ***
## V4.HS.grad 8.536e+14 1.554e+06 549287185 <2e-16 ***
## V4.Masters 5.098e+14 6.897e+06 73916108 <2e-16 ***
## V4.Preschool -7.942e+14 1.168e+07 -67976968 <2e-16 ***
## V4.Prof.school -1.694e+15 2.395e+07 -70706865 <2e-16 ***
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -2.643e+23 1.351e+16 -19568906 <2e-16 ***
## V6.Married.AF.spouse -2.643e+23 1.351e+16 -19568905 <2e-16 ***
## V6.Married.civ.spouse -2.643e+23 1.351e+16 -19568906 <2e-16 ***
## V6.Married.spouse.absent -2.643e+23 1.351e+16 -19568906 <2e-16 ***
## V6.Never.married -2.643e+23 1.351e+16 -19568906 <2e-16 ***
## V6.Separated -2.643e+23 1.351e+16 -19568906 <2e-16 ***
## V6.Widowed -2.643e+23 1.351e+16 -19568906 <2e-16 ***
## V7.. NA NA NA NA
## V7.Adm.clerical -3.265e+14 4.647e+06 -70257398 <2e-16 ***
## V7.Armed.Forces -4.665e+15 4.787e+07 -97450463 <2e-16 ***
## V7.Craft.repair 2.108e+14 4.902e+06 43005445 <2e-16 ***
## V7.Exec.managerial 1.235e+14 5.240e+06 23565555 <2e-16 ***
## V7.Farming.fishing 2.963e+14 6.693e+06 44271086 <2e-16 ***
## V7.Handlers.cleaners 3.236e+14 4.612e+06 70165336 <2e-16 ***
## V7.Machine.op.inspct -1.470e+15 4.973e+06 -295567434 <2e-16 ***
## V7.Other.service 4.414e+13 4.590e+06 9617761 <2e-16 ***
## V7.Priv.house.serv -3.686e+14 7.296e+06 -50516843 <2e-16 ***
## V7.Prof.specialty -2.620e+14 5.270e+06 -49719442 <2e-16 ***
## V7.Protective.serv 6.283e+13 8.165e+06 7695092 <2e-16 ***
## V7.Sales 1.131e+15 4.784e+06 236460930 <2e-16 ***
## V7.Tech.support 1.395e+15 5.826e+06 239435477 <2e-16 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband NA NA NA NA
## V8.Not.in.family 3.482e+15 9.682e+06 359616643 <2e-16 ***
## V8.Other.relative 4.885e+15 9.668e+06 505245465 <2e-16 ***
## V8.Own.child 3.720e+15 9.637e+06 385960479 <2e-16 ***
## V8.Unmarried 3.304e+15 9.742e+06 339125425 <2e-16 ***
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -1.717e+14 5.239e+06 -32771224 <2e-16 ***
## V9.Asian.Pac.Islander -1.508e+14 5.099e+06 -29577939 <2e-16 ***
## V9.Black -3.632e+14 1.702e+06 -213386456 <2e-16 ***
## V9.Other -1.156e+13 5.428e+06 -2129090 <2e-16 ***
## V9.White NA NA NA NA
## V10.Female 4.905e+14 1.604e+06 305773612 <2e-16 ***
## V10.Male NA NA NA NA
## V11 -3.734e+11 7.408e+02 -504091032 <2e-16 ***
## V12 -9.507e+11 2.509e+03 -378913694 <2e-16 ***
## V13 -3.690e+10 5.427e+04 -679912 <2e-16 ***
## V14.. 3.980e+12 3.910e+07 101777 <2e-16 ***
## V14.Cambodia 1.201e+15 4.931e+07 24358909 <2e-16 ***
## V14.Canada -4.481e+14 4.053e+07 -11055459 <2e-16 ***
## V14.China -1.591e+15 4.220e+07 -37706892 <2e-16 ***
## V14.Columbia -7.221e+14 4.039e+07 -17876415 <2e-16 ***
## V14.Cuba -8.205e+14 4.034e+07 -20337752 <2e-16 ***
## V14.Dominican.Republic -3.375e+14 3.999e+07 -8439025 <2e-16 ***
## V14.Ecuador -7.987e+14 4.345e+07 -18383118 <2e-16 ***
## V14.El.Salvador -1.195e+15 3.964e+07 -30145689 <2e-16 ***
## V14.England -1.332e+15 4.090e+07 -32579623 <2e-16 ***
## V14.France -1.045e+14 4.554e+07 -2295046 <2e-16 ***
## V14.Germany -5.695e+14 3.988e+07 -14279993 <2e-16 ***
## V14.Greece 6.868e+14 5.484e+07 12523143 <2e-16 ***
## V14.Guatemala -5.692e+14 4.005e+07 -14214327 <2e-16 ***
## V14.Haiti -2.853e+14 4.052e+07 -7040838 <2e-16 ***
## V14.Holand.Netherlands 2.722e+15 7.779e+07 34992795 <2e-16 ***
## V14.Honduras -2.491e+15 4.432e+07 -56207905 <2e-16 ***
## V14.Hong 8.131e+14 4.773e+07 17035327 <2e-16 ***
## V14.Hungary -2.933e+15 5.490e+07 -53419238 <2e-16 ***
## V14.India -4.417e+14 4.213e+07 -10483773 <2e-16 ***
## V14.Iran 6.464e+13 4.749e+07 1361005 <2e-16 ***
## V14.Ireland -2.555e+14 4.480e+07 -5702740 <2e-16 ***
## V14.Italy -5.468e+14 4.213e+07 -12979504 <2e-16 ***
## V14.Jamaica 7.952e+11 3.990e+07 19932 <2e-16 ***
## V14.Japan -5.812e+14 4.215e+07 -13791219 <2e-16 ***
## V14.Laos 1.176e+15 4.455e+07 26404737 <2e-16 ***
## V14.Mexico -1.325e+15 3.902e+07 -33944214 <2e-16 ***
## V14.Nicaragua -5.289e+14 4.176e+07 -12664350 <2e-16 ***
## V14.Outlying.US.Guam.USVI.etc. -8.388e+13 4.425e+07 -1895814 <2e-16 ***
## V14.Peru 1.525e+14 4.161e+07 3664779 <2e-16 ***
## V14.Philippines -6.837e+14 3.982e+07 -17171190 <2e-16 ***
## V14.Poland -1.525e+15 4.250e+07 -35894641 <2e-16 ***
## V14.Portugal 6.529e+14 4.279e+07 15257512 <2e-16 ***
## V14.Puerto.Rico -3.411e+14 3.965e+07 -8602395 <2e-16 ***
## V14.Scotland 1.643e+15 5.484e+07 29963584 <2e-16 ***
## V14.South -1.209e+14 4.095e+07 -2951861 <2e-16 ***
## V14.Taiwan 2.964e+15 4.262e+07 69540675 <2e-16 ***
## V14.Thailand -1.328e+15 4.505e+07 -29470571 <2e-16 ***
## V14.Trinadad.Tobago 3.008e+14 4.426e+07 6797066 <2e-16 ***
## V14.United.States -3.569e+14 3.881e+07 -9196141 <2e-16 ***
## V14.Vietnam 5.029e+14 4.053e+07 12408760 <2e-16 ***
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 72.103 on 12080 degrees of freedom
## Residual deviance: 142804.955 on 11981 degrees of freedom
## AIC: 143005
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_PC_5.40.5_n5_LrFit0,50) + ggtitle("Adult_TDA_PCA_5.40.5_n5_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_PC_5.40.5_n5_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n5_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n5_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n5_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6674 2317
## >50K 742 35
##
## Accuracy : 0.6868
## 95% CI : (0.6775, 0.696)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.1104
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.89995
## Specificity : 0.01488
## Pos Pred Value : 0.74230
## Neg Pred Value : 0.04505
## Prevalence : 0.75921
## Detection Rate : 0.68325
## Detection Prevalence : 0.92045
## Balanced Accuracy : 0.45741
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n5_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6674 2317
## >50K 742 35
##
## Accuracy : 0.6868
## 95% CI : (0.6775, 0.696)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : -0.1104
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.89995
## Specificity : 0.01488
## Pos Pred Value : 0.74230
## Neg Pred Value : 0.04505
## Prevalence : 0.75921
## Detection Rate : 0.68325
## Detection Prevalence : 0.92045
## Balanced Accuracy : 0.45741
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n5_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 6.868346e-01 -1.104181e-01 6.775314e-01 6.960262e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.000000e+00 3.803777e-178
ad_tda_pc_5.40.5_n5_lr_cf0_ov_acc<-ad_tda_pc_5.40.5_n5_lr_cf0$overall[1]
ad_tda_pc_5.40.5_n5_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.89994606 0.01488095 0.74229785
## Neg Pred Value Precision Recall
## 0.04504505 0.74229785 0.89994606
## F1 Prevalence Detection Rate
## 0.81355519 0.75921376 0.68325143
## Detection Prevalence Balanced Accuracy
## 0.92045455 0.45741351
ad_tda_pc_5.40.5_n5_lr_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n5_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted LR vs. tda-assisted LR classifiers
### 3-fold diff
diff_tda_pca_5.40.5_lr_n5_3_fold<-(ad_lr_fit_re - ad_tda_pc_5.40.5_n5_lr_fit_re)
diff_tda_pca_5.40.5_lr_n5_3_fold
## Accuracy
## 1 -0.1520733
## 2 -0.1431236
## 3 -0.1018545
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_lr.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_lr_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_lr.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_lr.n5_3_fold_odds.left<-bst_tda_pca_5.40.5_lr.n5_3_fold$probLeft/bst_tda_pca_5.40.5_lr.n5_3_fold$probRight
bst_tda_pca_5.40.5_lr.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_lr.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_lr_n5_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_lr.n5_3_fold
## $winLeft
## [1] 0.9903667
##
## $winRope
## [1] 0.009633333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.40.5_lr.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_lr_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_lr.n5_3_fold
## $left
## [1] 0.9896771
##
## $rope
## [1] 0.002635091
##
## $right
## [1] 0.007687766
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_lr_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_lr.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_lr_n5_3_fold))
#bf_tda_pca_5.40.5_lr.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_lr_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_lr_n5_3_fold)
## t = -8.5579, df = 2, p-value = 0.01338
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.19889226 -0.06580869
## sample estimates:
## mean of x
## -0.1323505
### Test set diff
diff_tda_pca_5.40.5_lr.n5_test<-(lr_cf_ov_acc - ad_tda_pc_5.40.5_n5_lr_cf0_ov_acc)
diff_tda_pca_5.40.5_lr.n5_test
## Accuracy
## 0.1664619
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_lr.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_lr.n5_test),-0.01,0.01)
bst_tda_pca_5.40.5_lr.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_lr.n5_test_odds.left<-bst_tda_pca_5.40.5_lr.n5_test$probLeft/bst_tda_pca_5.40.5_lr.n5_test$probRight
bst_tda_pca_5.40.5_lr.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_lr.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_lr.n5_test),-0.01,0.01)
bsr_tda_pca_5.40.5_lr.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1602
##
## $winRight
## [1] 0.8398
# Bayesian Correlated Test
bct_tda_pca_5.40.5_lr.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_lr.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_lr.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_lr.n5_test)))
#BayesFactor
#bf_tda_pca_5.40.5_lr.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_lr.n5_test)) #bf_tda_pca_5.40.5_lr.n5_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_lr.n5_test))
##With TDA KDE filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_KDE_5.40.5_n1_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_kde_adult_5.40.5.n1.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.40.5_n1_LrFit0
## Generalized Linear Model
##
## 11838 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7893, 7891, 7892
## Resampling results:
##
## Accuracy Kappa
## 0.8201559 0.4342154
Adult_TDA_KDE_5.40.5_n1_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.8590621 0.61882270 Fold1
## 2 0.8530530 0.60139865 Fold2
## 3 0.7483528 0.08242486 Fold3
ad_tda_kde_5.40.5_n1_lr_fit_re<-Adult_TDA_KDE_5.40.5_n1_LrFit0$resample[1]
summary(Adult_TDA_KDE_5.40.5_n1_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (10 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.113e+13 9.119e+12 1.220 0.222368
## V1 1.088e-02 2.619e-03 4.154 3.26e-05 ***
## V2.. -1.113e+13 9.119e+12 -1.220 0.222368
## V2.Federal.gov -1.113e+13 9.119e+12 -1.220 0.222368
## V2.Local.gov -1.113e+13 9.119e+12 -1.220 0.222368
## V2.Never.worked -1.113e+13 9.119e+12 -1.220 0.222368
## V2.Private -1.113e+13 9.119e+12 -1.220 0.222368
## V2.Self.emp.inc -1.113e+13 9.119e+12 -1.220 0.222368
## V2.Self.emp.not.inc -1.113e+13 9.119e+12 -1.220 0.222368
## V2.State.gov -1.113e+13 9.119e+12 -1.220 0.222368
## V2.Without.pay -1.113e+13 9.119e+12 -1.220 0.222368
## V3 5.868e-07 2.255e-07 2.603 0.009251 **
## V4.10th -1.194e+00 1.755e-01 -6.804 1.01e-11 ***
## V4.11th -1.072e+00 1.827e-01 -5.865 4.48e-09 ***
## V4.12th -7.273e-01 3.287e-01 -2.212 0.026936 *
## V4.1st.4th -1.808e+00 4.882e-01 -3.702 0.000214 ***
## V4.5th.6th -1.547e+00 3.114e-01 -4.968 6.77e-07 ***
## V4.7th.8th -1.637e+00 2.000e-01 -8.186 2.70e-16 ***
## V4.9th -1.355e+00 2.321e-01 -5.837 5.33e-09 ***
## V4.Assoc.acdm 3.674e-02 1.795e-01 0.205 0.837846
## V4.Assoc.voc 1.062e-01 1.747e-01 0.608 0.543032
## V4.Bachelors 5.414e-01 1.086e-01 4.986 6.18e-07 ***
## V4.Doctorate 1.779e+00 1.825e-01 9.747 < 2e-16 ***
## V4.HS.grad -4.552e-01 1.090e-01 -4.177 2.95e-05 ***
## V4.Masters 9.859e-01 1.325e-01 7.442 9.94e-14 ***
## V4.Preschool -3.307e+01 8.064e+04 0.000 0.999673
## V4.Prof.school 1.525e+00 1.642e-01 9.288 < 2e-16 ***
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -2.655e-01 1.951e-01 -1.361 0.173540
## V6.Married.AF.spouse 1.235e+00 1.090e+00 1.133 0.257181
## V6.Married.civ.spouse 1.997e+00 4.655e-01 4.290 1.79e-05 ***
## V6.Married.spouse.absent -6.959e-01 4.141e-01 -1.681 0.092833 .
## V6.Never.married -7.668e-01 2.061e-01 -3.720 0.000199 ***
## V6.Separated -3.263e-01 2.905e-01 -1.123 0.261468
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical -2.936e-01 1.777e-01 -1.653 0.098422 .
## V7.Armed.Forces -3.381e-01 2.167e+00 -0.156 0.876044
## V7.Craft.repair -2.075e-02 1.510e-01 -0.137 0.890703
## V7.Exec.managerial 6.550e-01 1.523e-01 4.302 1.69e-05 ***
## V7.Farming.fishing -1.048e+00 2.244e-01 -4.671 3.00e-06 ***
## V7.Handlers.cleaners -8.334e-01 2.594e-01 -3.212 0.001316 **
## V7.Machine.op.inspct -6.281e-01 2.024e-01 -3.103 0.001918 **
## V7.Other.service -1.054e+00 2.183e-01 -4.831 1.36e-06 ***
## V7.Priv.house.serv -2.366e+01 2.906e+04 -0.001 0.999351
## V7.Prof.specialty 3.298e-01 1.594e-01 2.069 0.038578 *
## V7.Protective.serv 2.757e-02 2.502e-01 0.110 0.912245
## V7.Sales 1.269e-01 1.601e-01 0.793 0.427879
## V7.Tech.support 4.948e-01 2.192e-01 2.257 0.023983 *
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.201e+00 1.684e-01 -7.133 9.82e-13 ***
## V8.Not.in.family -4.274e-01 4.533e-01 -0.943 0.345816
## V8.Other.relative -1.276e+00 4.407e-01 -2.896 0.003783 **
## V8.Own.child -1.707e+00 4.694e-01 -3.638 0.000275 ***
## V8.Unmarried -3.763e-01 4.681e-01 -0.804 0.421439
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -1.909e-01 3.557e-01 -0.537 0.591570
## V9.Asian.Pac.Islander 3.178e-01 2.844e-01 1.117 0.263852
## V9.Black -1.353e-01 1.240e-01 -1.091 0.275281
## V9.Other 3.902e-01 4.148e-01 0.941 0.346874
## V9.White NA NA NA NA
## V10.Female -9.194e-01 1.213e-01 -7.580 3.46e-14 ***
## V10.Male NA NA NA NA
## V11 3.029e-04 1.647e-05 18.384 < 2e-16 ***
## V12 6.481e-04 5.875e-05 11.031 < 2e-16 ***
## V13 3.012e-02 2.434e-03 12.376 < 2e-16 ***
## V14.. -9.887e-01 1.296e+00 -0.763 0.445383
## V14.Cambodia 4.047e-01 1.748e+00 0.232 0.816894
## V14.Canada -5.794e-01 1.328e+00 -0.436 0.662715
## V14.China -2.116e+00 1.390e+00 -1.523 0.127830
## V14.Columbia -2.259e+00 1.594e+00 -1.417 0.156421
## V14.Cuba -1.496e+00 1.361e+00 -1.099 0.271608
## V14.Dominican.Republic -2.387e+01 4.607e+04 -0.001 0.999587
## V14.Ecuador -2.370e+00 1.996e+00 -1.188 0.234892
## V14.El.Salvador -1.635e+00 1.553e+00 -1.052 0.292628
## V14.England -7.178e-01 1.354e+00 -0.530 0.596036
## V14.France -1.059e+00 1.444e+00 -0.733 0.463480
## V14.Germany -3.792e-01 1.350e+00 -0.281 0.778762
## V14.Greece -1.202e+00 1.531e+00 -0.785 0.432213
## V14.Guatemala -1.167e+00 1.623e+00 -0.719 0.472313
## V14.Haiti -1.381e+00 2.137e+00 -0.646 0.518080
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras -1.261e+00 3.252e+00 -0.388 0.698132
## V14.Hong -7.677e-01 1.554e+00 -0.494 0.621374
## V14.Hungary -3.315e-01 1.702e+00 -0.195 0.845548
## V14.India -1.664e+00 1.373e+00 -1.212 0.225415
## V14.Iran -1.686e+00 1.519e+00 -1.110 0.266917
## V14.Ireland 1.462e-01 2.155e+00 0.068 0.945926
## V14.Italy 7.389e-01 1.343e+00 0.550 0.582131
## V14.Jamaica -2.141e+00 1.704e+00 -1.257 0.208906
## V14.Japan 2.198e-01 1.491e+00 0.147 0.882794
## V14.Laos -4.481e-02 1.653e+00 -0.027 0.978372
## V14.Mexico -1.378e+00 1.301e+00 -1.059 0.289775
## V14.Nicaragua -1.518e+00 1.528e+00 -0.993 0.320589
## V14.Outlying.US.Guam.USVI.etc. -2.578e+01 1.448e+05 0.000 0.999858
## V14.Peru -1.132e+00 1.709e+00 -0.662 0.507871
## V14.Philippines -1.388e+00 1.350e+00 -1.028 0.304148
## V14.Poland -6.021e-01 1.440e+00 -0.418 0.675849
## V14.Portugal -2.076e+00 1.788e+00 -1.161 0.245464
## V14.Puerto.Rico -1.049e-01 1.388e+00 -0.076 0.939739
## V14.Scotland 3.049e+00 3.769e+00 0.809 0.418522
## V14.South -2.541e+00 1.470e+00 -1.729 0.083838 .
## V14.Taiwan -4.805e-01 1.475e+00 -0.326 0.744673
## V14.Thailand -1.781e+00 1.972e+00 -0.903 0.366382
## V14.Trinadad.Tobago -8.897e-01 1.784e+00 -0.499 0.617965
## V14.United.States -7.410e-01 1.277e+00 -0.580 0.561645
## V14.Vietnam -1.149e+00 1.545e+00 -0.744 0.456965
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 13553.3 on 11837 degrees of freedom
## Residual deviance: 7290.1 on 11739 degrees of freedom
## AIC: 7488.1
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.40.5_n1_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.40.5_n1_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.40.5_n1_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n1_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n1_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n1_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6965 1007
## >50K 451 1345
##
## Accuracy : 0.8507
## 95% CI : (0.8435, 0.8577)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5559
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9392
## Specificity : 0.5719
## Pos Pred Value : 0.8737
## Neg Pred Value : 0.7489
## Prevalence : 0.7592
## Detection Rate : 0.7130
## Detection Prevalence : 0.8161
## Balanced Accuracy : 0.7555
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n1_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6965 1007
## >50K 451 1345
##
## Accuracy : 0.8507
## 95% CI : (0.8435, 0.8577)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5559
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9392
## Specificity : 0.5719
## Pos Pred Value : 0.8737
## Neg Pred Value : 0.7489
## Prevalence : 0.7592
## Detection Rate : 0.7130
## Detection Prevalence : 0.8161
## Balanced Accuracy : 0.7555
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n1_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.507371e-01 5.559070e-01 8.435147e-01 8.577497e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 2.669432e-110 7.274250e-48
ad_tda_kde_5.40.5_n1_lr_cf0_ov_acc<-ad_tda_kde_5.40.5_n1_lr_cf0$overall[1]
ad_tda_kde_5.40.5_n1_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9391855 0.5718537 0.8736829
## Neg Pred Value Precision Recall
## 0.7488864 0.8736829 0.9391855
## F1 Prevalence Detection Rate
## 0.9052508 0.7592138 0.7130426
## Detection Prevalence Balanced Accuracy
## 0.8161343 0.7555196
ad_tda_kde_5.40.5_n1_lr_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n1_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_lr_n1_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.40.5_n1_lr_fit_re)
diff_tda_kde_5.40.5_lr_n1_3_fold
## Accuracy
## 1 -0.012128700
## 2 0.002830176
## 3 0.102114527
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_lr.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_lr_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_lr.n1_3_fold
## $probLeft
## [1] 0.25
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_lr.n1_3_fold_odds.left<-bst_tda_kde_5.40.5_lr.n1_3_fold$probLeft/bst_tda_kde_5.40.5_lr.n1_3_fold$probRight
bst_tda_kde_5.40.5_lr.n1_3_fold_odds.left
## [1] 1
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_lr.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_lr_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_lr.n1_3_fold
## $winLeft
## [1] 0.0739
##
## $winRope
## [1] 0.4478
##
## $winRight
## [1] 0.4783
# Bayesian Correlated Test
bct_tda_kde_5.40.5_lr.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_lr_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_lr.n1_3_fold
## $left
## [1] 0.2134573
##
## $rope
## [1] 0.1181525
##
## $right
## [1] 0.6683902
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_lr_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_lr.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_lr_n1_3_fold))
#bf_tda_kde_5.40.5_lr.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_lr_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_lr_n1_3_fold)
## t = 0.86303, df = 2, p-value = 0.4791
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.1233070 0.1851843
## sample estimates:
## mean of x
## 0.03093867
### Test set diff
diff_tda_kde_5.40.5_lr.n1_test<-(lr_cf_ov_acc - ad_tda_kde_5.40.5_n1_lr_cf0_ov_acc)
diff_tda_kde_5.40.5_lr.n1_test
## Accuracy
## 0.002559378
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_lr.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_lr.n1_test),-0.01,0.01)
bst_tda_kde_5.40.5_lr.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_lr.n1_test_odds.left<-bst_tda_kde_5.40.5_lr.n1_test$probLeft/bst_tda_kde_5.40.5_lr.n1_test$probRight
bst_tda_kde_5.40.5_lr.n1_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_lr.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_lr.n1_test),-0.01,0.01)
bsr_tda_kde_5.40.5_lr.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_lr.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_lr.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_lr.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_lr.n1_test)))
#BayesFactor
#bf_tda_kde_5.40.5_lr.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_lr.n1_test)) #bf_tda_pca_5.40.5_lr.n1_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_lr.n1_test))
##With TDA KDE filter 5 intervals, 50% overlap, 5 bins
##Node2
Adult_TDA_KDE_5.40.5_n2_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_kde_adult_5.40.5.n2.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.40.5_n2_LrFit0
## Generalized Linear Model
##
## 11203 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7469, 7468, 7469
## Resampling results:
##
## Accuracy Kappa
## 0.842722 0.5987201
Adult_TDA_KDE_5.40.5_n2_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.8478843 0.6098806 Fold1
## 2 0.8278447 0.5648368 Fold2
## 3 0.8524371 0.6214427 Fold3
ad_tda_kde_5.40.5_n2_lr_fit_re<-Adult_TDA_KDE_5.40.5_n2_LrFit0$resample[1]
summary(Adult_TDA_KDE_5.40.5_n2_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (15 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 1.108e+12 3.045e+12 0.364 0.715928
## V1 4.027e-02 3.200e-03 12.586 < 2e-16 ***
## V2.. -1.108e+12 3.045e+12 -0.364 0.715928
## V2.Federal.gov -1.108e+12 3.045e+12 -0.364 0.715928
## V2.Local.gov -1.108e+12 3.045e+12 -0.364 0.715928
## V2.Never.worked -1.108e+12 3.045e+12 -0.364 0.715928
## V2.Private -1.108e+12 3.045e+12 -0.364 0.715928
## V2.Self.emp.inc -1.108e+12 3.045e+12 -0.364 0.715928
## V2.Self.emp.not.inc -1.108e+12 3.045e+12 -0.364 0.715928
## V2.State.gov -1.108e+12 3.045e+12 -0.364 0.715928
## V2.Without.pay -1.108e+12 3.045e+12 -0.364 0.715928
## V3 1.125e-06 3.162e-07 3.558 0.000374 ***
## V4.10th -2.353e+01 3.391e+04 -0.001 0.999446
## V4.11th -7.339e-01 2.154e-01 -3.407 0.000657 ***
## V4.12th -8.115e-01 3.520e-01 -2.306 0.021122 *
## V4.1st.4th NA NA NA NA
## V4.5th.6th NA NA NA NA
## V4.7th.8th NA NA NA NA
## V4.9th NA NA NA NA
## V4.Assoc.acdm 3.451e-01 1.499e-01 2.301 0.021367 *
## V4.Assoc.voc 1.242e-01 1.531e-01 0.811 0.417150
## V4.Bachelors 9.138e-01 9.759e-02 9.364 < 2e-16 ***
## V4.Doctorate NA NA NA NA
## V4.HS.grad -4.593e-01 9.413e-02 -4.879 1.06e-06 ***
## V4.Masters 1.376e+00 1.190e-01 11.562 < 2e-16 ***
## V4.Preschool NA NA NA NA
## V4.Prof.school 2.500e+00 2.257e-01 11.078 < 2e-16 ***
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -1.558e-01 2.409e-01 -0.647 0.517796
## V6.Married.AF.spouse 3.907e+00 1.026e+00 3.806 0.000141 ***
## V6.Married.civ.spouse 2.198e+00 5.607e-01 3.921 8.82e-05 ***
## V6.Married.spouse.absent 1.211e-01 3.877e-01 0.312 0.754776
## V6.Never.married -6.083e-01 2.532e-01 -2.402 0.016285 *
## V6.Separated -3.332e-01 3.308e-01 -1.007 0.313815
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 6.236e-02 1.744e-01 0.358 0.720698
## V7.Armed.Forces -2.400e+01 3.307e+05 0.000 0.999942
## V7.Craft.repair 1.423e-01 1.550e-01 0.918 0.358509
## V7.Exec.managerial 9.258e-01 1.546e-01 5.987 2.13e-09 ***
## V7.Farming.fishing -6.734e-01 2.304e-01 -2.923 0.003466 **
## V7.Handlers.cleaners -4.640e-01 2.816e-01 -1.648 0.099437 .
## V7.Machine.op.inspct -5.604e-01 2.131e-01 -2.630 0.008535 **
## V7.Other.service -1.012e+00 2.282e-01 -4.434 9.27e-06 ***
## V7.Priv.house.serv -3.434e+00 2.578e+00 -1.332 0.182856
## V7.Prof.specialty 4.856e-01 1.627e-01 2.984 0.002845 **
## V7.Protective.serv 6.623e-01 2.319e-01 2.855 0.004300 **
## V7.Sales 2.612e-01 1.609e-01 1.623 0.104617
## V7.Tech.support 5.647e-01 2.079e-01 2.716 0.006607 **
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.451e+00 1.585e-01 -9.153 < 2e-16 ***
## V8.Not.in.family -7.229e-01 5.263e-01 -1.373 0.169607
## V8.Other.relative -1.611e+00 4.430e-01 -3.637 0.000276 ***
## V8.Own.child -1.620e+00 5.343e-01 -3.032 0.002427 **
## V8.Unmarried -7.305e-01 5.377e-01 -1.358 0.174315
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo 2.862e-02 3.225e-01 0.089 0.929271
## V9.Asian.Pac.Islander -9.959e-02 2.457e-01 -0.405 0.685292
## V9.Black -5.255e-02 1.305e-01 -0.403 0.687217
## V9.Other 1.851e-01 4.201e-01 0.441 0.659551
## V9.White NA NA NA NA
## V10.Female -9.044e-01 1.192e-01 -7.590 3.21e-14 ***
## V10.Male NA NA NA NA
## V11 3.290e-04 1.718e-05 19.155 < 2e-16 ***
## V12 8.095e-04 6.614e-05 12.240 < 2e-16 ***
## V13 2.604e-02 2.853e-03 9.128 < 2e-16 ***
## V14.. -5.844e-01 9.204e-01 -0.635 0.525453
## V14.Cambodia 1.853e+00 1.345e+00 1.377 0.168381
## V14.Canada 7.088e-02 9.975e-01 0.071 0.943353
## V14.China -3.302e-01 1.091e+00 -0.303 0.762038
## V14.Columbia -2.514e+01 7.035e+04 0.000 0.999715
## V14.Cuba 1.833e-01 1.046e+00 0.175 0.860916
## V14.Dominican.Republic -2.412e+01 7.824e+04 0.000 0.999754
## V14.Ecuador -1.048e+00 1.648e+00 -0.636 0.524769
## V14.El.Salvador -1.346e-01 1.188e+00 -0.113 0.909734
## V14.England 6.579e-01 9.931e-01 0.662 0.507712
## V14.France 6.376e-01 1.315e+00 0.485 0.627643
## V14.Germany 5.010e-01 9.871e-01 0.508 0.611736
## V14.Greece -1.217e+00 1.223e+00 -0.995 0.319698
## V14.Guatemala -6.987e-01 2.228e+00 -0.314 0.753790
## V14.Haiti -7.312e-01 1.381e+00 -0.530 0.596353
## V14.Holand.Netherlands -2.210e+01 3.347e+05 0.000 0.999947
## V14.Honduras -2.281e+01 1.486e+05 0.000 0.999878
## V14.Hong 1.758e+00 1.381e+00 1.273 0.203080
## V14.Hungary 1.821e+00 1.986e+00 0.917 0.359233
## V14.India -3.107e-01 9.846e-01 -0.316 0.752326
## V14.Iran -4.481e-02 1.248e+00 -0.036 0.971345
## V14.Ireland -2.395e+01 1.527e+05 0.000 0.999875
## V14.Italy 1.075e+00 1.078e+00 0.997 0.318933
## V14.Jamaica -1.634e+00 1.383e+00 -1.182 0.237282
## V14.Japan 9.220e-01 1.119e+00 0.824 0.409816
## V14.Laos -2.462e+01 1.120e+05 0.000 0.999825
## V14.Mexico -4.619e-01 9.951e-01 -0.464 0.642542
## V14.Nicaragua -2.398e+01 8.214e+04 0.000 0.999767
## V14.Outlying.US.Guam.USVI.etc. -2.416e+01 1.881e+05 0.000 0.999898
## V14.Peru -1.533e-01 1.459e+00 -0.105 0.916334
## V14.Philippines 6.989e-01 9.668e-01 0.723 0.469721
## V14.Poland 4.892e-01 1.098e+00 0.445 0.656029
## V14.Portugal -2.413e+01 1.412e+05 0.000 0.999864
## V14.Puerto.Rico -2.242e-01 1.203e+00 -0.186 0.852156
## V14.Scotland 5.750e-02 1.410e+00 0.041 0.967467
## V14.South -1.041e+00 1.098e+00 -0.948 0.343192
## V14.Taiwan 3.489e-01 1.100e+00 0.317 0.751137
## V14.Thailand -8.134e-01 1.512e+00 -0.538 0.590554
## V14.Trinadad.Tobago -2.293e-01 1.788e+00 -0.128 0.897928
## V14.United.States 1.898e-01 8.892e-01 0.213 0.830939
## V14.Vietnam -1.885e+00 1.463e+00 -1.288 0.197606
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 13299.2 on 11202 degrees of freedom
## Residual deviance: 7327.8 on 11109 degrees of freedom
## AIC: 7515.8
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.40.5_n2_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.40.5_n2_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.40.5_n2_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n2_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n2_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n2_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6895 994
## >50K 521 1358
##
## Accuracy : 0.8449
## 95% CI : (0.8376, 0.852)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5445
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9297
## Specificity : 0.5774
## Pos Pred Value : 0.8740
## Neg Pred Value : 0.7227
## Prevalence : 0.7592
## Detection Rate : 0.7059
## Detection Prevalence : 0.8076
## Balanced Accuracy : 0.7536
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n2_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6895 994
## >50K 521 1358
##
## Accuracy : 0.8449
## 95% CI : (0.8376, 0.852)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5445
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 0.9297
## Specificity : 0.5774
## Pos Pred Value : 0.8740
## Neg Pred Value : 0.7227
## Prevalence : 0.7592
## Detection Rate : 0.7059
## Detection Prevalence : 0.8076
## Balanced Accuracy : 0.7536
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n2_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.449017e-01 5.445151e-01 8.375691e-01 8.520281e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 3.448526e-96 7.644200e-34
ad_tda_kde_5.40.5_n2_lr_cf0_ov_acc<-ad_tda_kde_5.40.5_n2_lr_cf0$overall[1]
ad_tda_kde_5.40.5_n2_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9297465 0.5773810 0.8740018
## Neg Pred Value Precision Recall
## 0.7227249 0.8740018 0.9297465
## F1 Prevalence Detection Rate
## 0.9010127 0.7592138 0.7058763
## Detection Prevalence Balanced Accuracy
## 0.8076372 0.7535637
ad_tda_kde_5.40.5_n2_lr_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n2_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_lr_n2_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.40.5_n2_lr_fit_re)
diff_tda_kde_5.40.5_lr_n2_3_fold
## Accuracy
## 1 -0.0009509028
## 2 0.0280384150
## 3 -0.0019697751
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_lr.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_lr_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_lr.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.75
##
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_lr.n2_3_fold_odds.left<-bst_tda_kde_5.40.5_lr.n2_3_fold$probLeft/bst_tda_kde_5.40.5_lr.n2_3_fold$probRight
bst_tda_kde_5.40.5_lr.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_lr.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_lr_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_lr.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.5722333
##
## $winRight
## [1] 0.4277667
# Bayesian Correlated Test
bct_tda_kde_5.40.5_lr.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_lr_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_lr.n2_3_fold
## $left
## [1] 0.1235925
##
## $rope
## [1] 0.426803
##
## $right
## [1] 0.4496045
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_lr_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_lr.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_lr_n2_3_fold))
#bf_tda_kde_5.40.5_lr.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_lr_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_lr_n2_3_fold)
## t = 0.8511, df = 2, p-value = 0.4844
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.03395398 0.05069913
## sample estimates:
## mean of x
## 0.008372579
### Test set diff
diff_tda_kde_5.40.5_lr.n2_test<-(lr_cf_ov_acc - ad_tda_kde_5.40.5_n2_lr_cf0_ov_acc)
diff_tda_kde_5.40.5_lr.n2_test
## Accuracy
## 0.008394758
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_lr.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_lr.n2_test),-0.01,0.01)
bst_tda_kde_5.40.5_lr.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_lr.n2_test_odds.left<-bst_tda_kde_5.40.5_lr.n2_test$probLeft/bst_tda_kde_5.40.5_lr.n2_test$probRight
bst_tda_kde_5.40.5_lr.n2_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_lr.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_lr.n2_test),-0.01,0.01)
bsr_tda_kde_5.40.5_lr.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_lr.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_lr.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_lr.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_lr.n2_test)))
#BayesFactor
#bf_tda_kde_5.40.5_lr.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_lr.n2_test)) #bf_tda_pca_5.40.5_lr.n2_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_lr.n2_test))
##Node3
Adult_TDA_KDE_5.40.5_n3_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_kde_adult_5.40.5.n3.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.40.5_n3_LrFit0
## Generalized Linear Model
##
## 10351 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 6901, 6900, 6901
## Resampling results:
##
## Accuracy Kappa
## 0.8308375 0.5760205
Adult_TDA_KDE_5.40.5_n3_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.8414493 0.6017940 Fold1
## 2 0.8316430 0.5722788 Fold2
## 3 0.8194203 0.5539889 Fold3
ad_tda_kde_5.40.5_n3_lr_fit_re<-Adult_TDA_KDE_5.40.5_n2_LrFit0$resample[1]
summary(Adult_TDA_KDE_5.40.5_n3_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (19 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -5.442e+12 1.082e+13 -0.503 0.614894
## V1 6.072e-02 3.986e-03 15.234 < 2e-16 ***
## V2.. 5.442e+12 1.082e+13 0.503 0.614894
## V2.Federal.gov 5.442e+12 1.082e+13 0.503 0.614894
## V2.Local.gov 5.442e+12 1.082e+13 0.503 0.614894
## V2.Never.worked -4.498e+15 1.082e+13 -415.815 < 2e-16 ***
## V2.Private 5.442e+12 1.082e+13 0.503 0.614894
## V2.Self.emp.inc 5.442e+12 1.082e+13 0.503 0.614894
## V2.Self.emp.not.inc 5.442e+12 1.082e+13 0.503 0.614894
## V2.State.gov 5.442e+12 1.082e+13 0.503 0.614894
## V2.Without.pay 5.442e+12 1.082e+13 0.503 0.614894
## V3 1.137e-06 4.582e-07 2.482 0.013054 *
## V4.10th NA NA NA NA
## V4.11th NA NA NA NA
## V4.12th -9.076e-02 3.513e-01 -0.258 0.796118
## V4.1st.4th NA NA NA NA
## V4.5th.6th NA NA NA NA
## V4.7th.8th NA NA NA NA
## V4.9th NA NA NA NA
## V4.Assoc.acdm 5.015e-01 1.444e-01 3.472 0.000517 ***
## V4.Assoc.voc 3.809e-01 1.435e-01 2.654 0.007949 **
## V4.Bachelors 1.218e+00 9.825e-02 12.396 < 2e-16 ***
## V4.Doctorate NA NA NA NA
## V4.HS.grad -1.749e-01 9.321e-02 -1.877 0.060585 .
## V4.Masters 1.568e+00 1.493e-01 10.503 < 2e-16 ***
## V4.Preschool NA NA NA NA
## V4.Prof.school NA NA NA NA
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -5.096e-02 3.358e-01 -0.152 0.879399
## V6.Married.AF.spouse 3.786e+00 1.228e+00 3.083 0.002052 **
## V6.Married.civ.spouse 2.273e+00 5.456e-01 4.166 3.10e-05 ***
## V6.Married.spouse.absent 6.208e-02 4.724e-01 0.131 0.895439
## V6.Never.married -2.785e-01 3.464e-01 -0.804 0.421473
## V6.Separated -1.224e-01 4.115e-01 -0.297 0.766196
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 2.099e-01 1.716e-01 1.223 0.221165
## V7.Armed.Forces -2.358e+01 2.328e+05 0.000 0.999919
## V7.Craft.repair 1.714e-01 1.522e-01 1.126 0.260112
## V7.Exec.managerial 1.029e+00 1.540e-01 6.683 2.34e-11 ***
## V7.Farming.fishing -8.919e-01 2.579e-01 -3.458 0.000545 ***
## V7.Handlers.cleaners -4.710e-01 2.636e-01 -1.787 0.073959 .
## V7.Machine.op.inspct -1.628e-01 1.876e-01 -0.868 0.385445
## V7.Other.service -6.540e-01 2.227e-01 -2.936 0.003320 **
## V7.Priv.house.serv -2.325e+01 5.408e+04 0.000 0.999657
## V7.Prof.specialty 6.886e-01 1.615e-01 4.263 2.02e-05 ***
## V7.Protective.serv 1.027e+00 2.257e-01 4.550 5.36e-06 ***
## V7.Sales 6.394e-01 1.581e-01 4.045 5.24e-05 ***
## V7.Tech.support 6.904e-01 1.990e-01 3.469 0.000522 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.494e+00 1.705e-01 -8.764 < 2e-16 ***
## V8.Not.in.family -1.007e+00 4.577e-01 -2.199 0.027883 *
## V8.Other.relative -1.873e+00 4.227e-01 -4.431 9.37e-06 ***
## V8.Own.child -2.335e+00 4.404e-01 -5.301 1.15e-07 ***
## V8.Unmarried -1.133e+00 4.719e-01 -2.401 0.016346 *
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -6.787e-01 4.331e-01 -1.567 0.117108
## V9.Asian.Pac.Islander 1.375e-01 2.310e-01 0.595 0.551541
## V9.Black -1.395e-01 1.281e-01 -1.089 0.276125
## V9.Other -6.095e-01 4.897e-01 -1.245 0.213209
## V9.White NA NA NA NA
## V10.Female -7.951e-01 1.350e-01 -5.890 3.87e-09 ***
## V10.Male NA NA NA NA
## V11 3.207e-04 1.753e-05 18.297 < 2e-16 ***
## V12 6.917e-04 6.675e-05 10.363 < 2e-16 ***
## V13 2.357e-02 3.016e-03 7.817 5.42e-15 ***
## V14.. 2.307e-02 1.223e+00 0.019 0.984951
## V14.Cambodia 2.013e+00 1.490e+00 1.350 0.176891
## V14.Canada 1.081e+00 1.297e+00 0.834 0.404368
## V14.China -6.130e-01 1.384e+00 -0.443 0.657914
## V14.Columbia -2.369e+01 5.866e+04 0.000 0.999678
## V14.Cuba 1.418e+00 1.368e+00 1.036 0.300035
## V14.Dominican.Republic -2.499e-01 1.654e+00 -0.151 0.879931
## V14.Ecuador -6.154e-01 1.754e+00 -0.351 0.725773
## V14.El.Salvador 4.170e-01 1.487e+00 0.280 0.779117
## V14.England 8.603e-01 1.312e+00 0.656 0.511967
## V14.France 1.691e+00 1.519e+00 1.113 0.265873
## V14.Germany 9.204e-01 1.261e+00 0.730 0.465447
## V14.Greece -7.581e-01 1.493e+00 -0.508 0.611498
## V14.Guatemala -2.237e+01 1.215e+05 0.000 0.999853
## V14.Haiti 6.880e-01 1.459e+00 0.471 0.637356
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras -2.327e+01 2.169e+05 0.000 0.999914
## V14.Hong -2.156e+01 2.294e+05 0.000 0.999925
## V14.Hungary -2.462e+01 1.243e+05 0.000 0.999842
## V14.India 1.691e-01 1.293e+00 0.131 0.895951
## V14.Iran 5.009e-01 1.328e+00 0.377 0.706073
## V14.Ireland 1.907e+00 1.544e+00 1.235 0.216681
## V14.Italy 4.168e-01 1.332e+00 0.313 0.754403
## V14.Jamaica 5.246e-01 1.418e+00 0.370 0.711331
## V14.Japan 8.073e-01 1.367e+00 0.590 0.554959
## V14.Laos -2.363e+01 1.420e+05 0.000 0.999867
## V14.Mexico -2.971e-01 1.351e+00 -0.220 0.825969
## V14.Nicaragua -2.110e+01 1.543e+05 0.000 0.999891
## V14.Outlying.US.Guam.USVI.etc. -2.248e+01 1.749e+05 0.000 0.999897
## V14.Peru -6.751e-01 1.795e+00 -0.376 0.706891
## V14.Philippines 1.609e+00 1.268e+00 1.269 0.204332
## V14.Poland 4.808e-01 1.345e+00 0.358 0.720679
## V14.Portugal 9.624e-01 1.529e+00 0.629 0.529083
## V14.Puerto.Rico -6.887e-01 1.380e+00 -0.499 0.617852
## V14.Scotland -5.474e-03 1.801e+00 -0.003 0.997575
## V14.South -5.974e-01 1.368e+00 -0.437 0.662327
## V14.Taiwan -2.731e-02 1.412e+00 -0.019 0.984565
## V14.Thailand -4.045e-01 1.792e+00 -0.226 0.821425
## V14.Trinadad.Tobago 9.461e-02 1.933e+00 0.049 0.960974
## V14.United.States 6.505e-01 1.204e+00 0.540 0.589131
## V14.Vietnam -2.113e+00 1.651e+00 -1.280 0.200546
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 12380.9 on 10350 degrees of freedom
## Residual deviance: 7087.5 on 10261 degrees of freedom
## AIC: 7267.5
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.40.5_n3_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.40.5_n3_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.40.5_n3_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n3_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n3_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n3_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6733 930
## >50K 683 1422
##
## Accuracy : 0.8349
## 95% CI : (0.8274, 0.8422)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5316
##
## Mcnemar's Test P-Value : 9.059e-10
##
## Sensitivity : 0.9079
## Specificity : 0.6046
## Pos Pred Value : 0.8786
## Neg Pred Value : 0.6755
## Prevalence : 0.7592
## Detection Rate : 0.6893
## Detection Prevalence : 0.7845
## Balanced Accuracy : 0.7562
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n3_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6733 930
## >50K 683 1422
##
## Accuracy : 0.8349
## 95% CI : (0.8274, 0.8422)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.5316
##
## Mcnemar's Test P-Value : 9.059e-10
##
## Sensitivity : 0.9079
## Specificity : 0.6046
## Pos Pred Value : 0.8786
## Neg Pred Value : 0.6755
## Prevalence : 0.7592
## Detection Rate : 0.6893
## Detection Prevalence : 0.7845
## Balanced Accuracy : 0.7562
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n3_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.348690e-01 5.315527e-01 8.273549e-01 8.421827e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 1.759957e-74 9.058863e-10
ad_tda_kde_5.40.5_n3_lr_cf0_ov_acc<-ad_tda_kde_5.40.5_n3_lr_cf0$overall[1]
ad_tda_kde_5.40.5_n3_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.9079018 0.6045918 0.8786376
## Neg Pred Value Precision Recall
## 0.6755344 0.8786376 0.9079018
## F1 Prevalence Detection Rate
## 0.8930300 0.7592138 0.6892916
## Detection Prevalence Balanced Accuracy
## 0.7845004 0.7562468
ad_tda_kde_5.40.5_n3_lr_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n3_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_lr_n3_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.40.5_n3_lr_fit_re)
diff_tda_kde_5.40.5_lr_n3_3_fold
## Accuracy
## 1 -0.0009509028
## 2 0.0280384150
## 3 -0.0019697751
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_lr.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_lr_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_lr.n3_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.75
##
## $probRight
## [1] 0.25
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_lr.n3_3_fold_odds.left<-bst_tda_kde_5.40.5_lr.n3_3_fold$probLeft/bst_tda_kde_5.40.5_lr.n3_3_fold$probRight
bst_tda_kde_5.40.5_lr.n3_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_lr.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_lr_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_lr.n3_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.5806333
##
## $winRight
## [1] 0.4193667
# Bayesian Correlated Test
bct_tda_kde_5.40.5_lr.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_lr_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_lr.n3_3_fold
## $left
## [1] 0.1235925
##
## $rope
## [1] 0.426803
##
## $right
## [1] 0.4496045
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_lr_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_lr.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_lr_n3_3_fold))
#bf_tda_kde_5.40.5_lr.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_lr_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_lr_n3_3_fold)
## t = 0.8511, df = 2, p-value = 0.4844
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.03395398 0.05069913
## sample estimates:
## mean of x
## 0.008372579
### Test set diff
diff_tda_kde_5.40.5_lr.n3_test<-(lr_cf_ov_acc - ad_tda_kde_5.40.5_n3_lr_cf0_ov_acc)
diff_tda_kde_5.40.5_lr.n3_test
## Accuracy
## 0.01842752
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_lr.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_lr.n3_test),-0.01,0.01)
bst_tda_kde_5.40.5_lr.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_lr.n3_test_odds.left<-bst_tda_kde_5.40.5_lr.n3_test$probLeft/bst_tda_kde_5.40.5_lr.n3_test$probRight
bst_tda_kde_5.40.5_lr.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_lr.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_lr.n2_test),-0.01,0.01)
bsr_tda_kde_5.40.5_lr.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_lr.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_lr.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_lr.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_lr.n3_test)))
#BayesFactor
#bf_tda_kde_5.40.5_lr.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_lr.n3_test)) #bf_tda_pca_5.40.5_lr.n3_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_lr.n3_test))
##Node4
Adult_TDA_KDE_5.40.5_n4_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_kde_adult_5.40.5.n4.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.40.5_n4_LrFit0
## Generalized Linear Model
##
## 8741 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 5828, 5827, 5827
## Resampling results:
##
## Accuracy Kappa
## 0.8612281 0.5431938
Adult_TDA_KDE_5.40.5_n4_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.8558187 0.5280364 Fold1
## 2 0.8651338 0.5527739 Fold2
## 3 0.8627316 0.5487711 Fold3
ad_tda_kde_5.40.5_n4_lr_fit_re<-Adult_TDA_KDE_5.40.5_n4_LrFit0$resample[1]
summary(Adult_TDA_KDE_5.40.5_n4_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (19 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.134e+13 3.330e+13 6.410e-01 0.521537
## V1 7.427e-02 5.668e-03 1.310e+01 < 2e-16 ***
## V2.. -1.687e+13 2.973e+13 -5.670e-01 0.570403
## V2.Federal.gov -1.687e+13 2.972e+13 -5.680e-01 0.570294
## V2.Local.gov -1.687e+13 2.973e+13 -5.680e-01 0.570339
## V2.Never.worked -4.520e+15 2.974e+13 -1.520e+02 < 2e-16 ***
## V2.Private -1.687e+13 2.972e+13 -5.680e-01 0.570204
## V2.Self.emp.inc -1.687e+13 2.973e+13 -5.680e-01 0.570324
## V2.Self.emp.not.inc -1.687e+13 2.972e+13 -5.680e-01 0.570201
## V2.State.gov -1.687e+13 2.972e+13 -5.680e-01 0.570244
## V2.Without.pay -4.520e+15 2.972e+13 -1.521e+02 < 2e-16 ***
## V3 1.573e-06 6.993e-07 2.250e+00 0.024469 *
## V4.10th NA NA NA NA
## V4.11th NA NA NA NA
## V4.12th -3.042e-01 4.582e-01 -6.640e-01 0.506712
## V4.1st.4th NA NA NA NA
## V4.5th.6th NA NA NA NA
## V4.7th.8th NA NA NA NA
## V4.9th NA NA NA NA
## V4.Assoc.acdm 7.289e-01 2.256e-01 3.231e+00 0.001234 **
## V4.Assoc.voc 4.727e-01 1.401e-01 3.375e+00 0.000739 ***
## V4.Bachelors 1.143e+00 1.251e-01 9.141e+00 < 2e-16 ***
## V4.Doctorate NA NA NA NA
## V4.HS.grad -1.622e-01 9.156e-02 -1.771e+00 0.076507 .
## V4.Masters NA NA NA NA
## V4.Preschool NA NA NA NA
## V4.Prof.school NA NA NA NA
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -4.254e-01 5.627e-01 -7.560e-01 0.449703
## V6.Married.AF.spouse 2.095e+00 1.162e+00 1.803e+00 0.071403 .
## V6.Married.civ.spouse 2.102e+00 7.620e-01 2.758e+00 0.005810 **
## V6.Married.spouse.absent 1.707e-01 7.473e-01 2.280e-01 0.819276
## V6.Never.married -5.590e-01 5.737e-01 -9.740e-01 0.329899
## V6.Separated -5.369e-01 6.451e-01 -8.320e-01 0.405223
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 2.327e-01 1.894e-01 1.229e+00 0.219108
## V7.Armed.Forces -4.504e+15 3.001e+07 -1.501e+08 < 2e-16 ***
## V7.Craft.repair 2.033e-01 1.621e-01 1.254e+00 0.209691
## V7.Exec.managerial 9.821e-01 1.715e-01 5.725e+00 1.03e-08 ***
## V7.Farming.fishing -7.879e-01 3.005e-01 -2.622e+00 0.008747 **
## V7.Handlers.cleaners -6.266e-01 2.813e-01 -2.228e+00 0.025907 *
## V7.Machine.op.inspct -2.835e-02 1.952e-01 -1.450e-01 0.884499
## V7.Other.service -4.776e-01 2.329e-01 -2.051e+00 0.040294 *
## V7.Priv.house.serv -1.469e+01 5.911e+02 -2.500e-02 0.980176
## V7.Prof.specialty 8.882e-01 1.945e-01 4.566e+00 4.98e-06 ***
## V7.Protective.serv 1.042e+00 2.587e-01 4.027e+00 5.64e-05 ***
## V7.Sales 5.888e-01 1.749e-01 3.366e+00 0.000762 ***
## V7.Tech.support 8.556e-01 2.249e-01 3.805e+00 0.000142 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.572e+00 2.361e-01 -6.658e+00 2.78e-11 ***
## V8.Not.in.family -1.003e+00 5.625e-01 -1.784e+00 0.074417 .
## V8.Other.relative -2.437e+00 5.794e-01 -4.206e+00 2.60e-05 ***
## V8.Own.child -2.175e+00 5.216e-01 -4.170e+00 3.05e-05 ***
## V8.Unmarried -1.376e+00 5.854e-01 -2.351e+00 0.018729 *
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -1.179e+00 5.163e-01 -2.284e+00 0.022401 *
## V9.Asian.Pac.Islander 4.072e-01 3.108e-01 1.310e+00 0.190105
## V9.Black -3.458e-01 1.624e-01 -2.129e+00 0.033244 *
## V9.Other -1.315e+00 6.762e-01 -1.945e+00 0.051799 .
## V9.White NA NA NA NA
## V10.Female -8.700e-01 2.084e-01 -4.174e+00 2.99e-05 ***
## V10.Male NA NA NA NA
## V11 3.328e-04 2.235e-05 1.489e+01 < 2e-16 ***
## V12 5.415e-04 7.757e-05 6.981e+00 2.94e-12 ***
## V13 2.948e-02 3.695e-03 7.978e+00 1.49e-15 ***
## V14.. -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Cambodia -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Canada -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.China -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Columbia -4.508e+15 2.096e+13 -2.151e+02 < 2e-16 ***
## V14.Cuba -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Dominican.Republic -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Ecuador -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.El.Salvador -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.England -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.France -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Germany -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Greece -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Guatemala -4.508e+15 2.096e+13 -2.151e+02 < 2e-16 ***
## V14.Haiti -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras -4.508e+15 2.096e+13 -2.151e+02 < 2e-16 ***
## V14.Hong -4.508e+15 2.096e+13 -2.151e+02 < 2e-16 ***
## V14.Hungary -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.India -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Iran -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Ireland -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Italy -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Jamaica -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Japan -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Laos -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Mexico -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Nicaragua -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Outlying.US.Guam.USVI.etc. -4.508e+15 2.096e+13 -2.151e+02 < 2e-16 ***
## V14.Peru -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Philippines -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Poland -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Portugal -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Puerto.Rico -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Scotland -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.South -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Taiwan -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Thailand -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Trinadad.Tobago -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.United.States -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Vietnam -4.470e+12 2.096e+13 -2.130e-01 0.831085
## V14.Yugoslavia -4.470e+12 2.096e+13 -2.130e-01 0.831085
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 8821.8 on 8740 degrees of freedom
## Residual deviance: 4879.6 on 8651 degrees of freedom
## AIC: 5059.6
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.40.5_n4_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.40.5_n4_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.40.5_n4_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n4_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n4_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n4_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6568 846
## >50K 848 1506
##
## Accuracy : 0.8266
## 95% CI : (0.8189, 0.834)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.5258
##
## Mcnemar's Test P-Value : 0.9806
##
## Sensitivity : 0.8857
## Specificity : 0.6403
## Pos Pred Value : 0.8859
## Neg Pred Value : 0.6398
## Prevalence : 0.7592
## Detection Rate : 0.6724
## Detection Prevalence : 0.7590
## Balanced Accuracy : 0.7630
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n4_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6568 846
## >50K 848 1506
##
## Accuracy : 0.8266
## 95% CI : (0.8189, 0.834)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.5258
##
## Mcnemar's Test P-Value : 0.9806
##
## Sensitivity : 0.8857
## Specificity : 0.6403
## Pos Pred Value : 0.8859
## Neg Pred Value : 0.6398
## Prevalence : 0.7592
## Detection Rate : 0.6724
## Detection Prevalence : 0.7590
## Balanced Accuracy : 0.7630
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n4_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.265766e-01 5.258061e-01 8.189201e-01 8.340378e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 7.052021e-59 9.806161e-01
ad_tda_kde_5.40.5_n4_lr_cf0_ov_acc<-ad_tda_kde_5.40.5_n4_lr_cf0$overall[1]
ad_tda_kde_5.40.5_n4_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.8856526 0.6403061 0.8858916
## Neg Pred Value Precision Recall
## 0.6397621 0.8858916 0.8856526
## F1 Prevalence Detection Rate
## 0.8857721 0.7592138 0.6723997
## Detection Prevalence Balanced Accuracy
## 0.7590090 0.7629794
ad_tda_kde_5.40.5_n4_lr_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n4_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_lr_n4_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.40.5_n4_lr_fit_re)
diff_tda_kde_5.40.5_lr_n4_3_fold
## Accuracy
## 1 -0.00888534
## 2 -0.00925071
## 3 -0.01226435
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_lr.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_lr_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_lr.n4_3_fold
## $probLeft
## [1] 0.25
##
## $probRope
## [1] 0.75
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_lr.n4_3_fold_odds.left<-bst_tda_kde_5.40.5_lr.n4_3_fold$probLeft/bst_tda_kde_5.40.5_lr.n4_3_fold$probRight
bst_tda_kde_5.40.5_lr.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_lr.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_lr_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_lr.n4_3_fold
## $winLeft
## [1] 0.3226667
##
## $winRope
## [1] 0.6773333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_lr.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_lr_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_lr.n4_3_fold
## $left
## [1] 0.5380583
##
## $rope
## [1] 0.4600671
##
## $right
## [1] 0.001874644
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_lr_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_lr.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_lr_n4_3_fold))
#bf_tda_kde_5.40.5_lr.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_lr_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_lr_n4_3_fold)
## t = -9.4648, df = 2, p-value = 0.01098
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.014740101 -0.005526832
## sample estimates:
## mean of x
## -0.01013347
### Test set diff
diff_tda_kde_5.40.5_lr.n4_test<-(lr_cf_ov_acc - ad_tda_kde_5.40.5_n4_lr_cf0_ov_acc)
diff_tda_kde_5.40.5_lr.n4_test
## Accuracy
## 0.0267199
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_lr.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_lr.n4_test),-0.01,0.01)
bst_tda_kde_5.40.5_lr.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_lr.n4_test_odds.left<-bst_tda_kde_5.40.5_lr.n4_test$probLeft/bst_tda_kde_5.40.5_lr.n4_test$probRight
bst_tda_kde_5.40.5_lr.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_lr.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_lr.n4_test),-0.01,0.01)
bsr_tda_kde_5.40.5_lr.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1597333
##
## $winRight
## [1] 0.8402667
# Bayesian Correlated Test
bct_tda_kde_5.40.5_lr.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_lr.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_lr.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_lr.n4_test)))
#BayesFactor
#bf_tda_kde_5.40.5_lr.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_lr.n4_test)) #bf_tda_pca_5.40.5_lr.n4_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_lr.n4_test))
##Node5
Adult_TDA_KDE_5.40.5_n5_LrFit0 <- train(as.factor(adult_df1) ~ .,
data = tda.m_kde_adult_5.40.5.n5.vec,
family = 'binomial',
method = 'glm',
trControl = fitControl,
metric='Accuracy')
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
Adult_TDA_KDE_5.40.5_n5_LrFit0
## Generalized Linear Model
##
## 6628 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 4419, 4418, 4419
## Resampling results:
##
## Accuracy Kappa
## 0.8636074 0.2775096
Adult_TDA_KDE_5.40.5_n5_LrFit0$resample
## Accuracy Kappa Resample
## 1 0.8623812 0.2931751 Fold1
## 2 0.8737557 0.4151387 Fold2
## 3 0.8546854 0.1242149 Fold3
ad_tda_kde_5.40.5_n5_lr_fit_re<-Adult_TDA_KDE_5.40.5_n5_LrFit0$resample[1]
summary(Adult_TDA_KDE_5.40.5_n5_LrFit0)
##
## Call:
## NULL
##
## Coefficients: (23 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.737e+13 3.923e+13 -0.443 0.657889
## V1 8.029e-02 7.893e-03 10.173 < 2e-16 ***
## V2.. 1.737e+13 3.923e+13 0.443 0.657889
## V2.Federal.gov 1.737e+13 3.923e+13 0.443 0.657889
## V2.Local.gov 1.737e+13 3.923e+13 0.443 0.657889
## V2.Never.worked -4.486e+15 3.923e+13 -114.370 < 2e-16 ***
## V2.Private 1.737e+13 3.923e+13 0.443 0.657889
## V2.Self.emp.inc 1.737e+13 3.923e+13 0.443 0.657889
## V2.Self.emp.not.inc 1.737e+13 3.923e+13 0.443 0.657889
## V2.State.gov 1.737e+13 3.923e+13 0.443 0.657889
## V2.Without.pay 1.737e+13 3.923e+13 0.443 0.657889
## V3 2.658e-06 1.072e-06 2.479 0.013192 *
## V4.10th NA NA NA NA
## V4.11th NA NA NA NA
## V4.12th NA NA NA NA
## V4.1st.4th NA NA NA NA
## V4.5th.6th NA NA NA NA
## V4.7th.8th NA NA NA NA
## V4.9th NA NA NA NA
## V4.Assoc.acdm NA NA NA NA
## V4.Assoc.voc 4.525e-01 2.170e-01 2.085 0.037046 *
## V4.Bachelors NA NA NA NA
## V4.Doctorate NA NA NA NA
## V4.HS.grad -3.360e-01 9.127e-02 -3.681 0.000232 ***
## V4.Masters NA NA NA NA
## V4.Preschool NA NA NA NA
## V4.Prof.school NA NA NA NA
## V4.Some.college NA NA NA NA
## V5 NA NA NA NA
## V6.Divorced -5.043e-01 1.080e+00 -0.467 0.640579
## V6.Married.AF.spouse 4.044e+00 1.723e+00 2.346 0.018957 *
## V6.Married.civ.spouse 1.844e+00 1.181e+00 1.562 0.118387
## V6.Married.spouse.absent -5.915e-01 1.326e+00 -0.446 0.655531
## V6.Never.married -9.496e-01 1.081e+00 -0.878 0.379787
## V6.Separated -8.007e-01 1.153e+00 -0.694 0.487494
## V6.Widowed NA NA NA NA
## V7.. NA NA NA NA
## V7.Adm.clerical 2.966e-01 2.268e-01 1.308 0.190857
## V7.Armed.Forces -2.304e+01 1.579e+05 0.000 0.999884
## V7.Craft.repair 2.749e-01 1.769e-01 1.554 0.120233
## V7.Exec.managerial 8.084e-01 1.961e-01 4.123 3.73e-05 ***
## V7.Farming.fishing -3.908e-01 3.405e-01 -1.148 0.251029
## V7.Handlers.cleaners -3.787e-01 2.768e-01 -1.368 0.171286
## V7.Machine.op.inspct 5.938e-02 2.111e-01 0.281 0.778506
## V7.Other.service -3.837e-01 2.667e-01 -1.438 0.150302
## V7.Priv.house.serv -2.345e+01 7.742e+04 0.000 0.999758
## V7.Prof.specialty 1.171e+00 2.472e-01 4.737 2.17e-06 ***
## V7.Protective.serv 9.087e-01 2.903e-01 3.130 0.001748 **
## V7.Sales 4.306e-01 1.970e-01 2.186 0.028815 *
## V7.Tech.support 1.063e+00 2.781e-01 3.824 0.000131 ***
## V7.Transport.moving NA NA NA NA
## V8.Husband -1.094e+00 3.263e-01 -3.354 0.000797 ***
## V8.Not.in.family -5.420e-01 6.132e-01 -0.884 0.376805
## V8.Other.relative -1.391e+00 6.401e-01 -2.173 0.029784 *
## V8.Own.child -1.515e+00 5.780e-01 -2.620 0.008791 **
## V8.Unmarried -1.360e+00 6.819e-01 -1.994 0.046160 *
## V8.Wife NA NA NA NA
## V9.Amer.Indian.Eskimo -1.217e+00 5.881e-01 -2.069 0.038543 *
## V9.Asian.Pac.Islander 7.260e-02 5.204e-01 0.140 0.889046
## V9.Black -4.920e-01 2.153e-01 -2.285 0.022314 *
## V9.Other -1.844e+00 1.070e+00 -1.723 0.084895 .
## V9.White NA NA NA NA
## V10.Female -6.073e-01 2.794e-01 -2.174 0.029702 *
## V10.Male NA NA NA NA
## V11 3.723e-04 2.826e-05 13.173 < 2e-16 ***
## V12 5.691e-04 9.226e-05 6.169 6.89e-10 ***
## V13 2.781e-02 4.368e-03 6.367 1.93e-10 ***
## V14.. -2.797e+00 1.456e+00 -1.921 0.054746 .
## V14.Cambodia -2.696e+01 1.388e+05 0.000 0.999845
## V14.Canada -2.908e+00 1.819e+00 -1.598 0.109997
## V14.China -2.394e+00 1.744e+00 -1.372 0.169975
## V14.Columbia -2.725e+01 8.472e+04 0.000 0.999743
## V14.Cuba -1.738e+00 1.640e+00 -1.059 0.289423
## V14.Dominican.Republic -1.902e+01 1.535e+03 -0.012 0.990112
## V14.Ecuador -1.200e+00 1.736e+00 -0.691 0.489309
## V14.El.Salvador -2.763e+01 7.608e+04 0.000 0.999710
## V14.England -2.983e+00 1.757e+00 -1.698 0.089458 .
## V14.France -2.497e+01 1.947e+05 0.000 0.999898
## V14.Germany -2.378e+00 1.509e+00 -1.576 0.115093
## V14.Greece -3.194e+00 2.119e+00 -1.507 0.131748
## V14.Guatemala -1.895e+00 1.821e+00 -1.041 0.298014
## V14.Haiti -2.470e+01 1.224e+05 0.000 0.999839
## V14.Holand.Netherlands NA NA NA NA
## V14.Honduras -2.627e+01 2.368e+05 0.000 0.999912
## V14.Hong -2.789e+01 1.462e+05 0.000 0.999848
## V14.Hungary -2.736e+01 3.817e+05 0.000 0.999943
## V14.India -2.663e+01 9.540e+04 0.000 0.999777
## V14.Iran -1.641e+00 1.985e+00 -0.827 0.408386
## V14.Ireland -2.755e+00 1.799e+00 -1.532 0.125566
## V14.Italy -4.157e+00 1.889e+00 -2.200 0.027780 *
## V14.Jamaica -1.689e+00 1.629e+00 -1.037 0.299929
## V14.Japan -2.745e+01 8.283e+04 0.000 0.999736
## V14.Laos -2.759e+01 1.612e+05 0.000 0.999863
## V14.Mexico -3.184e+00 1.508e+00 -2.112 0.034703 *
## V14.Nicaragua -2.646e+01 1.047e+05 0.000 0.999798
## V14.Outlying.US.Guam.USVI.etc. -2.611e+01 1.678e+05 0.000 0.999876
## V14.Peru -2.723e+01 1.231e+05 0.000 0.999823
## V14.Philippines -1.799e+00 1.626e+00 -1.106 0.268518
## V14.Poland -3.008e+00 1.805e+00 -1.667 0.095575 .
## V14.Portugal -2.232e+00 1.871e+00 -1.193 0.232859
## V14.Puerto.Rico -3.876e+00 1.790e+00 -2.166 0.030336 *
## V14.Scotland -2.850e+01 2.340e+05 0.000 0.999903
## V14.South -3.116e+00 1.943e+00 -1.604 0.108675
## V14.Taiwan -3.571e+00 1.969e+00 -1.814 0.069729 .
## V14.Thailand -9.201e-01 3.534e+00 -0.260 0.794571
## V14.Trinadad.Tobago -2.830e+01 2.341e+05 0.000 0.999904
## V14.United.States -2.581e+00 1.398e+00 -1.846 0.064923 .
## V14.Vietnam -2.804e+00 1.895e+00 -1.480 0.138916
## V14.Yugoslavia NA NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 5623.5 on 6627 degrees of freedom
## Residual deviance: 3522.3 on 6542 degrees of freedom
## AIC: 3694.3
##
## Number of Fisher Scoring iterations: 25
vip(Adult_TDA_KDE_5.40.5_n5_LrFit0,50) + ggtitle("Adult_TDA_KDE_5.40.5_n5_Lr1Fit TDA-Assited LR")

# Predict outcome using Adult_TDA_KDE_5.40.5_n5_LrFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n5_LrFit0, newdata= adult.one_hot_df4Test)
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from rank-deficient fit; attr(*, "non-estim") has doubtful cases
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n5_lr_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n5_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6465 866
## >50K 951 1486
##
## Accuracy : 0.814
## 95% CI : (0.8061, 0.8217)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.4974
##
## Mcnemar's Test P-Value : 0.04877
##
## Sensitivity : 0.8718
## Specificity : 0.6318
## Pos Pred Value : 0.8819
## Neg Pred Value : 0.6098
## Prevalence : 0.7592
## Detection Rate : 0.6619
## Detection Prevalence : 0.7505
## Balanced Accuracy : 0.7518
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n5_lr_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 6465 866
## >50K 951 1486
##
## Accuracy : 0.814
## 95% CI : (0.8061, 0.8217)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : < 2e-16
##
## Kappa : 0.4974
##
## Mcnemar's Test P-Value : 0.04877
##
## Sensitivity : 0.8718
## Specificity : 0.6318
## Pos Pred Value : 0.8819
## Neg Pred Value : 0.6098
## Prevalence : 0.7592
## Detection Rate : 0.6619
## Detection Prevalence : 0.7505
## Balanced Accuracy : 0.7518
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n5_lr_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.139844e-01 4.974288e-01 8.061234e-01 8.216578e-01 7.592138e-01
## AccuracyPValue McnemarPValue
## 4.867829e-39 4.876791e-02
ad_tda_kde_5.40.5_n5_lr_cf0_ov_acc<-ad_tda_kde_5.40.5_n5_lr_cf0$overall[1]
ad_tda_kde_5.40.5_n5_lr_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.8717638 0.6318027 0.8818715
## Neg Pred Value Precision Recall
## 0.6097661 0.8818715 0.8717638
## F1 Prevalence Detection Rate
## 0.8767885 0.7592138 0.6618550
## Detection Prevalence Balanced Accuracy
## 0.7505119 0.7517832
ad_tda_kde_5.40.5_n5_lr_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n5_lr_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_lr_n5_3_fold<-(ad_lr_fit_re - ad_tda_kde_5.40.5_n5_lr_fit_re)
diff_tda_kde_5.40.5_lr_n5_3_fold
## Accuracy
## 1 -0.015447764
## 2 -0.017872529
## 3 -0.004218088
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_lr.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_lr_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_lr.n5_3_fold
## $probLeft
## [1] 0.5
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_lr.n5_3_fold_odds.left<-bst_tda_kde_5.40.5_lr.n5_3_fold$probLeft/bst_tda_kde_5.40.5_lr.n5_3_fold$probRight
bst_tda_kde_5.40.5_lr.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_lr.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_lr_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_lr.n5_3_fold
## $winLeft
## [1] 0.5117667
##
## $winRope
## [1] 0.4882333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_lr.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_lr_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_lr.n5_3_fold
## $left
## [1] 0.6717891
##
## $rope
## [1] 0.3064492
##
## $right
## [1] 0.02176175
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_lr_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_lr.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_lr_n5_3_fold))
#bf_tda_kde_5.40.5_lr.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_lr_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_lr_n5_3_fold)
## t = -2.975, df = 2, p-value = 0.09685
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.030609780 0.005584192
## sample estimates:
## mean of x
## -0.01251279
### Test set diff
diff_tda_kde_5.40.5_lr.n5_test<-(lr_cf_ov_acc - ad_tda_kde_5.40.5_n5_lr_cf0_ov_acc)
diff_tda_kde_5.40.5_lr.n5_test
## Accuracy
## 0.03931204
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_lr.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_lr.n5_test),-0.01,0.01)
bst_tda_kde_5.40.5_lr.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_lr.n5_test_odds.left<-bst_tda_kde_5.40.5_lr.n5_test$probLeft/bst_tda_kde_5.40.5_lr.n5_test$probRight
bst_tda_kde_5.40.5_lr.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_lr.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_lr.n5_test),-0.01,0.01)
bsr_tda_kde_5.40.5_lr.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1582333
##
## $winRight
## [1] 0.8417667
# Bayesian Correlated Test
bct_tda_kde_5.40.5_lr.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_lr.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_lr.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_lr.n5_test)))
#BayesFactor
#bf_tda_kde_5.40.5_lr.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_lr.n5_test)) #bf_tda_pca_5.40.5_lr.n5_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_lr.n5_test))
#naiveBayes
adultNbFit <- train(as.factor(adult_df1) ~ ., data = adult.one_hot_df4Train,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V14.Holand.Netherlands, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Holand.Netherlands, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Thailand
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc.
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
adultNbFit
## Naive Bayes
##
## 22793 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 15196, 15195, 15195
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.7665954 0.04554121
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
adultNbFit$resample
## Accuracy Kappa Resample
## 1 0.7746479 0.094018742 Fold1
## 2 0.7654646 0.039289981 Fold2
## 3 0.7596736 0.003314921 Fold3
ad_nb_fit_re<-adultNbFit$resample[1]
summary(adultNbFit)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
#varImp (adultNbFit)
# Predict outcome using model from training data based on testing data
predictions <- predict(adultNbFit, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
nb_cf<-confusionMatrix(data=predictions, as.factor(adult.one_hot_df4Test$adult_df1))
nb_cf
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2219
## >50K 0 133
##
## Accuracy : 0.7728
## 95% CI : (0.7644, 0.7811)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.0008047
##
## Kappa : 0.0834
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 1.00000
## Specificity : 0.05655
## Pos Pred Value : 0.76969
## Neg Pred Value : 1.00000
## Prevalence : 0.75921
## Detection Rate : 0.75921
## Detection Prevalence : 0.98638
## Balanced Accuracy : 0.52827
##
## 'Positive' Class : <=50K
##
nb_cf$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.772829648 0.083417992 0.764388511 0.781107809 0.759213759
## AccuracyPValue McnemarPValue
## 0.000804745 0.000000000
nb_cf_ov_acc<-nb_cf$overall[1]
nb_cf$byClass
## Sensitivity Specificity Pos Pred Value
## 1.00000000 0.05654762 0.76969382
## Neg Pred Value Precision Recall
## 1.00000000 0.76969382 1.00000000
## F1 Prevalence Detection Rate
## 0.86986101 0.75921376 0.75921376
## Detection Prevalence Balanced Accuracy
## 0.98638411 0.52827381
nb_cf_pre_rec_f1<-nb_cf$byClass[5:7]
##With TDA PCA filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_PC_5.40.5_n1_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n1.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Federal.gov, V2.Local.gov, V2.Never.worked, V2.Private, V2.State.gov, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Assoc.voc, V4.HS.grad, V4.Preschool, V4.Some.college, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Never.married, V6.Separated, V6.Widowed, V7.., V7.Adm.clerical, V7.Armed.Forces, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Other.service, V7.Priv.house.serv, V7.Protective.serv, V7.Sales, V7.Tech.support, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V9.Amer.Indian.Eskimo, V9.Asian.Pac.Islander, V9.Black, V9.Other, V9.White, V10.Female, V10.Male, V14.., V14.Cambodia, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Federal.gov, V2.Never.worked, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Assoc.voc, V4.HS.grad, V4.Preschool, V4.Some.college, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Never.married, V6.Separated, V6.Widowed, V7.., V7.Adm.clerical, V7.Armed.Forces, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Other.service, V7.Priv.house.serv, V7.Protective.serv, V7.Sales, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V9.Amer.Indian.Eskimo, V9.Black, V9.Other, V10.Female, V10.Male, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Federal.gov, V2.Never.worked, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Assoc.voc, V4.HS.grad, V4.Preschool, V4.Some.college, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Never.married, V6.Separated, V6.Widowed, V7.., V7.Adm.clerical, V7.Armed.Forces, V7.Craft.repair, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Other.service, V7.Priv.house.serv, V7.Protective.serv, V7.Sales, V7.Tech.support, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V9.Amer.Indian.Eskimo, V9.Black, V9.Other, V10.Female, V10.Male, V14.., V14.Cambodia, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.40.5_n1_NbFit0
## Naive Bayes
##
## 3373 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 2249, 2249, 2248
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.9922918 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_PC_5.40.5_n1_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.9919929 0 Fold1
## 2 0.9928826 0 Fold2
## 3 0.9920000 0 Fold3
ad_tda_pc_5.40.5_n1_nb_fit_re<-Adult_TDA_PC_5.40.5_n1_NbFit0$resample[1]
summary(Adult_TDA_PC_5.40.5_n1_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_PC_5.40.5_n1_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n1_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n1_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n1_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 0 0
## >50K 7416 2352
##
## Accuracy : 0.2408
## 95% CI : (0.2323, 0.2494)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.2408
## Prevalence : 0.7592
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n1_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 0 0
## >50K 7416 2352
##
## Accuracy : 0.2408
## 95% CI : (0.2323, 0.2494)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.2408
## Prevalence : 0.7592
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n1_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.2407862 0.0000000 0.2323343 0.2493929 0.7592138
## AccuracyPValue McnemarPValue
## 1.0000000 0.0000000
ad_tda_pc_5.40.5_n1_nb_cf0_ov_acc<-ad_tda_pc_5.40.5_n1_nb_cf0$overall[1]
ad_tda_pc_5.40.5_n1_nb_cf0$byClas1
## NULL
ad_tda_pc_5.40.5_n1_nb_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n1_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.40.5_nb_n1_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.40.5_n1_nb_fit_re)
diff_tda_pca_5.40.5_nb_n1_3_fold
## Accuracy
## 1 -0.2173450
## 2 -0.2274180
## 3 -0.2323264
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_nb.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nb_n1_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_nb.n1_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_nb.n1_3_fold_odds.left<-bst_tda_pca_5.40.5_nb.n1_3_fold$probLeft/bst_tda_pca_5.40.5_nb.n1_3_fold$probRight
bst_tda_pca_5.40.5_nb.n1_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_nb.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nb_n1_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_nb.n1_3_fold
## $winLeft
## [1] 0.9905333
##
## $winRope
## [1] 0.009466667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.40.5_nb.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nb_n1_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nb.n1_3_fold
## $left
## [1] 0.9997216
##
## $rope
## [1] 4.520956e-05
##
## $right
## [1] 0.0002331816
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_nb_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_nb.n1_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nb_n1_3_fold))
#bf_tda_pca_5.40.5_nb.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_nb_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_nb_n1_3_fold)
## t = -51.183, df = 2, p-value = 0.0003815
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.2446694 -0.2067235
## sample estimates:
## mean of x
## -0.2256965
### Test set diff
diff_tda_pca_5.40.5_nb.n1_test<-(nb_cf_ov_acc - ad_tda_pc_5.40.5_n1_nb_cf0_ov_acc)
diff_tda_pca_5.40.5_nb.n1_test
## Accuracy
## 0.5320434
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_nb.n1_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nb.n1_test),-0.01,0.01)
bst_tda_pca_5.40.5_nb.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_nb.n1_test_odds.left<-bst_tda_pca_5.40.5_nb.n1_test$probLeft/bst_tda_pca_5.40.5_nb.n1_test$probRight
bst_tda_pca_5.40.5_nb.n1_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_nb.n1_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nb.n1_test),-0.01,0.01)
bsr_tda_pca_5.40.5_nb.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1615333
##
## $winRight
## [1] 0.8384667
# Bayesian Correlated Test
bct_tda_pca_5.40.5_nb.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nb.n1_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nb.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_nb.n1_test)))
#BayesFactor
#bf_tda_pca_5.40.5_nb.n1_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nb.n1_test)) #bf_tda_pca_5.40.5_nb.n1_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_nb.n1_test))
##Node2
Adult_TDA_PC_5.40.5_n2_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n2.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Unmarried, V8.Wife, V10.Female, V10.Male, V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Ireland, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Unmarried, V8.Wife, V10.Female, V10.Male, V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Ireland, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Own.child, V8.Unmarried, V8.Wife, V10.Female, V10.Male, V14.Cambodia, V14.Columbia, V14.Dominican.Republic, V14.Ecuador, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Ireland, V14.Jamaica, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.40.5_n2_NbFit0
## Naive Bayes
##
## 10276 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 6851, 6850, 6851
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.6108408 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_PC_5.40.5_n2_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.6108029 0 Fold1
## 2 0.6109165 0 Fold2
## 3 0.6108029 0 Fold3
ad_tda_pc_5.40.5_n2_nb_fit_re<-Adult_TDA_PC_5.40.5_n2_NbFit0$resample[1]
summary(Adult_TDA_PC_5.40.5_n2_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_PC_5.40.5_n2_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n2_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n2_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n2_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 0 0
## >50K 7416 2352
##
## Accuracy : 0.2408
## 95% CI : (0.2323, 0.2494)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.2408
## Prevalence : 0.7592
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n2_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 0 0
## >50K 7416 2352
##
## Accuracy : 0.2408
## 95% CI : (0.2323, 0.2494)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.2408
## Prevalence : 0.7592
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n2_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.2407862 0.0000000 0.2323343 0.2493929 0.7592138
## AccuracyPValue McnemarPValue
## 1.0000000 0.0000000
ad_tda_pc_5.40.5_n2_nb_cf0_ov_acc<-ad_tda_pc_5.40.5_n2_nb_cf0$overall[1]
ad_tda_pc_5.40.5_n2_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.0000000 1.0000000 NaN
## Neg Pred Value Precision Recall
## 0.2407862 NA 0.0000000
## F1 Prevalence Detection Rate
## NA 0.7592138 0.0000000
## Detection Prevalence Balanced Accuracy
## 0.0000000 0.5000000
ad_tda_pc_5.40.5_n2_nb_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n2_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.40.5_nb_n2_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.40.5_n2_nb_fit_re)
diff_tda_pca_5.40.5_nb_n2_3_fold
## Accuracy
## 1 0.1638450
## 2 0.1545481
## 3 0.1488707
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_nb.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nb_n2_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_nb.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_nb.n2_3_fold_odds.left<-bst_tda_pca_5.40.5_nb.n2_3_fold$probLeft/bst_tda_pca_5.40.5_nb.n2_3_fold$probRight
bst_tda_pca_5.40.5_nb.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_nb.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nb_n2_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_nb.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.008766667
##
## $winRight
## [1] 0.9912333
# Bayesian Correlated Test
bct_tda_pca_5.40.5_nb.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nb_n2_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nb.n2_3_fold
## $left
## [1] 0.0004615979
##
## $rope
## [1] 0.0001351271
##
## $right
## [1] 0.9994033
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_nb_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_nb.n2_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nb_n2_3_fold))
#bf_tda_pca_5.40.5_nb.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_nb_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_nb_n2_3_fold)
## t = 35.686, df = 2, p-value = 0.0007843
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.1369752 0.1745339
## sample estimates:
## mean of x
## 0.1557546
### Test set diff
diff_tda_pca_5.40.5_nb.n2_test<-(nb_cf_ov_acc - ad_tda_pc_5.40.5_n2_nb_cf0_ov_acc)
diff_tda_pca_5.40.5_nb.n2_test
## Accuracy
## 0.5320434
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_nb.n2_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nb.n2_test),-0.01,0.01)
bst_tda_pca_5.40.5_nb.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_nb.n2_test_odds.left<-bst_tda_pca_5.40.5_nb.n2_test$probLeft/bst_tda_pca_5.40.5_nb.n2_test$probRight
bst_tda_pca_5.40.5_nb.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_nb.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nb.n2_test),-0.01,0.01)
bsr_tda_pca_5.40.5_nb.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1591
##
## $winRight
## [1] 0.8409
# Bayesian Correlated Test
bct_tda_pca_5.40.5_nb.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nb.n2_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nb.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_nb.n2_test)))
#BayesFactor
#bf_tda_pca_5.40.5_nb.n2_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nb.n2_test)) #bf_tda_pca_5.40.5_nb.n2_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_nb.n2_test))
##Node3
Adult_TDA_PC_5.40.5_n3_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n3.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Columbia, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Thailand
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.El.Salvador, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Peru
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Ecuador, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Portugal
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.40.5_n3_NbFit0
## Naive Bayes
##
## 11563 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7709, 7708, 7709
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.7917496 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_PC_5.40.5_n3_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.7916450 0 Fold1
## 2 0.7916991 0 Fold2
## 3 0.7919045 0 Fold3
ad_tda_pc_5.40.5_n3_nb_fit_re<-Adult_TDA_PC_5.40.5_n3_NbFit0$resample[1]
summary(Adult_TDA_PC_5.40.5_n3_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_PC_5.40.5_n3_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n3_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n3_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n3_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2332
## >50K 0 20
##
## Accuracy : 0.7613
## 95% CI : (0.7527, 0.7697)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.3228
##
## Kappa : 0.0129
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.000000
## Specificity : 0.008503
## Pos Pred Value : 0.760771
## Neg Pred Value : 1.000000
## Prevalence : 0.759214
## Detection Rate : 0.759214
## Detection Prevalence : 0.997952
## Balanced Accuracy : 0.504252
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n3_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2332
## >50K 0 20
##
## Accuracy : 0.7613
## 95% CI : (0.7527, 0.7697)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.3228
##
## Kappa : 0.0129
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.000000
## Specificity : 0.008503
## Pos Pred Value : 0.760771
## Neg Pred Value : 1.000000
## Prevalence : 0.759214
## Detection Rate : 0.759214
## Detection Prevalence : 0.997952
## Balanced Accuracy : 0.504252
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n3_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.76126126 0.01285513 0.75267868 0.76968780 0.75921376
## AccuracyPValue McnemarPValue
## 0.32281324 0.00000000
ad_tda_pc_5.40.5_n3_nb_cf0_ov_acc<-ad_tda_pc_5.40.5_n3_nb_cf0$overall[1]
ad_tda_pc_5.40.5_n3_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.000000000 0.008503401 0.760771440
## Neg Pred Value Precision Recall
## 1.000000000 0.760771440 1.000000000
## F1 Prevalence Detection Rate
## 0.864134234 0.759213759 0.759213759
## Detection Prevalence Balanced Accuracy
## 0.997952498 0.504251701
ad_tda_pc_5.40.5_n3_nb_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n3_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.40.5_nb_n3_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.40.5_n3_nb_fit_re)
diff_tda_pca_5.40.5_nb_n3_3_fold
## Accuracy
## 1 -0.01699716
## 2 -0.02623450
## 3 -0.03223092
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_nb.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nb_n3_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_nb.n3_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_nb.n3_3_fold_odds.left<-bst_tda_pca_5.40.5_nb.n3_3_fold$probLeft/bst_tda_pca_5.40.5_nb.n3_3_fold$probRight
bst_tda_pca_5.40.5_nb.n3_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_nb.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nb_n3_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_nb.n3_3_fold
## $winLeft
## [1] 0.9633667
##
## $winRope
## [1] 0.03663333
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.40.5_nb.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nb_n3_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nb.n3_3_fold
## $left
## [1] 0.951211
##
## $rope
## [1] 0.03852412
##
## $right
## [1] 0.01026489
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_nb_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_nb.n3_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nb_n3_3_fold))
#bf_tda_pca_5.40.5_nb.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_nb_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_nb_n3_3_fold)
## t = -5.6773, df = 2, p-value = 0.02965
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.044217767 -0.006090613
## sample estimates:
## mean of x
## -0.02515419
### Test set diff
diff_tda_pca_5.40.5_nb.n3_test<-(nb_cf_ov_acc - ad_tda_pc_5.40.5_n3_nb_cf0_ov_acc)
diff_tda_pca_5.40.5_nb.n3_test
## Accuracy
## 0.01156839
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_nb.n3_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nb.n3_test),-0.01,0.01)
bst_tda_pca_5.40.5_nb.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_nb.n3_test_odds.left<-bst_tda_pca_5.40.5_nb.n3_test$probLeft/bst_tda_pca_5.40.5_nb.n3_test$probRight
bst_tda_pca_5.40.5_nb.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_nb.n2_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nb.n2_test),-0.01,0.01)
bsr_tda_pca_5.40.5_nb.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1580333
##
## $winRight
## [1] 0.8419667
# Bayesian Correlated Test
bct_tda_pca_5.40.5_nb.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nb.n3_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nb.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_nb.n3_test)))
#BayesFactor
#bf_tda_pca_5.40.5_nb.n3_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nb.n3_test)) #bf_tda_pca_5.40.5_nb.n3_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_nb.n3_test))
##Node4
Adult_TDA_PC_5.40.5_n4_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n4.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.El.Salvador, V14.Greece, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Iran, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Portugal, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.El.Salvador, V14.France, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Ireland, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V8.Husband, V14.Cambodia, V14.Columbia, V14.Cuba, V14.Ecuador, V14.El.Salvador, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.India, V14.Iran, V14.Italy, V14.Jamaica, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.40.5_n4_NbFit0
## Naive Bayes
##
## 14818 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9878, 9879, 9879
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.9574167 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_PC_5.40.5_n4_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.9572874 0 Fold1
## 2 0.9574813 0 Fold2
## 3 0.9574813 0 Fold3
ad_tda_pc_5.40.5_n4_nb_fit_re<-Adult_TDA_PC_5.40.5_n4_NbFit0$resample[1]
summary(Adult_TDA_PC_5.40.5_n4_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_PC_5.40.5_n4_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n4_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n4_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n4_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n4_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n4_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_pc_5.40.5_n4_nb_cf0_ov_acc<-ad_tda_pc_5.40.5_n4_nb_cf0$overall[1]
ad_tda_pc_5.40.5_n4_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_pc_5.40.5_n4_nb_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n4_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.40.5_nb_n4_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.40.5_n4_nb_fit_re)
diff_tda_pca_5.40.5_nb_n4_3_fold
## Accuracy
## 1 -0.1826396
## 2 -0.1920167
## 3 -0.1978077
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_nb.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nb_n4_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_nb.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_nb.n4_3_fold_odds.left<-bst_tda_pca_5.40.5_nb.n4_3_fold$probLeft/bst_tda_pca_5.40.5_nb.n4_3_fold$probRight
bst_tda_pca_5.40.5_nb.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_nb.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nb_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_nb.n4_3_fold
## $winLeft
## [1] 0.9914333
##
## $winRope
## [1] 0.008566667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.40.5_nb.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nb_n4_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nb.n4_3_fold
## $left
## [1] 0.9996023
##
## $rope
## [1] 7.520336e-05
##
## $right
## [1] 0.0003225284
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_nb_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_nb.n4_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nb_n4_3_fold))
#bf_tda_pca_5.40.5_nb.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_nb_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_nb_n4_3_fold)
## t = -43.179, df = 2, p-value = 0.0005359
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.2098358 -0.1718068
## sample estimates:
## mean of x
## -0.1908213
### Test set diff
diff_tda_pca_5.40.5_nb.n4_test<-(nb_cf_ov_acc - ad_tda_pc_5.40.5_n4_nb_cf0_ov_acc)
diff_tda_pca_5.40.5_nb.n4_test
## Accuracy
## 0.01361589
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_nb.n4_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nb.n4_test),-0.01,0.01)
bst_tda_pca_5.40.5_nb.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_nb.n4_test_odds.left<-bst_tda_pca_5.40.5_nb.n4_test$probLeft/bst_tda_pca_5.40.5_nb.n4_test$probRight
bst_tda_pca_5.40.5_nb.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_nb.n4_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nb.n4_test),-0.01,0.01)
bsr_tda_pca_5.40.5_nb.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.4648
##
## $winRight
## [1] 0.5352
# Bayesian Correlated Test
bct_tda_pca_5.40.5_nb.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nb.n4_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nb.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_nb.n4_test)))
#BayesFactor
#bf_tda_pca_5.40.5_nb.n4_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nb.n4_test)) #bf_tda_pca_5.40.5_nb.n4_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_nb.n4_test))
##Node5
Adult_TDA_PC_5.40.5_n5_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n5.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Federal.gov, V2.Local.gov, V2.Never.worked, V2.Private, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.State.gov, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Assoc.voc, V4.Bachelors, V4.Doctorate, V4.HS.grad, V4.Masters, V4.Preschool, V4.Prof.school, V4.Some.college, V5, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Widowed, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Exec.managerial, V7.Farming.fishing, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Priv.house.serv, V7.Prof.specialty, V7.Protective.serv, V7.Sales, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Asian.Pac.Islander, V9.Black, V9.Other, V9.White, V10.Female, V10.Male, V11, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Federal.gov, V2.Local.gov, V2.Never.worked, V2.Private, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.State.gov, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Assoc.voc, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Separated, V6.Widowed, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Exec.managerial, V7.Farming.fishing, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Priv.house.serv, V7.Prof.specialty, V7.Protective.serv, V7.Sales, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Asian.Pac.Islander, V9.Other, V10.Female, V10.Male, V11, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.United.States, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Federal.gov, V2.Local.gov, V2.Never.worked, V2.Private, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.State.gov, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Assoc.voc, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Widowed, V7.., V7.Adm.clerical, V7.Armed.Forces, V7.Craft.repair, V7.Exec.managerial, V7.Farming.fishing, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Other.service, V7.Priv.house.serv, V7.Prof.specialty, V7.Protective.serv, V7.Sales, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Own.child, V8.Wife, V9.Amer.Indian.Eskimo, V9.Asian.Pac.Islander, V9.Other, V10.Female, V10.Male, V11, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_PC_5.40.5_n5_NbFit0
## Naive Bayes
##
## 12081 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8053, 8054, 8055
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.9996689 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_PC_5.40.5_n5_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.9995035 0 Fold1
## 2 0.9997517 0 Fold2
## 3 0.9997516 0 Fold3
ad_tda_pc_5.40.5_n5_nb_fit_re<-Adult_TDA_PC_5.40.5_n5_NbFit0$resample[1]
summary(Adult_TDA_PC_5.40.5_n5_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_PC_5.40.5_n5_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_PC_5.40.5_n5_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_pc_5.40.5_n5_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_pc_5.40.5_n5_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n5_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_pc_5.40.5_n5_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_pc_5.40.5_n5_nb_cf0_ov_acc<-ad_tda_pc_5.40.5_n5_nb_cf0$overall[1]
ad_tda_pc_5.40.5_n5_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_pc_5.40.5_n5_nb_cf0_pre_rec_f1<-ad_tda_pc_5.40.5_n5_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_pca_5.40.5_nb_n5_3_fold<-(ad_nb_fit_re - ad_tda_pc_5.40.5_n5_nb_fit_re)
diff_tda_pca_5.40.5_nb_n5_3_fold
## Accuracy
## 1 -0.2248556
## 2 -0.2342871
## 3 -0.2400780
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_nb.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nb_n5_3_fold),-0.01,0.01)
bst_tda_pca_5.40.5_nb.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_nb.n5_3_fold_odds.left<-bst_tda_pca_5.40.5_nb.n5_3_fold$probLeft/bst_tda_pca_5.40.5_nb.n5_3_fold$probRight
bst_tda_pca_5.40.5_nb.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_nb.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nb_n4_3_fold),-0.01,0.01)
bsr_tda_pca_5.40.5_nb.n5_3_fold
## $winLeft
## [1] 0.9916333
##
## $winRope
## [1] 0.008366667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_pca_5.40.5_nb.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nb_n5_3_fold),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nb.n5_3_fold
## $left
## [1] 0.9997366
##
## $rope
## [1] 4.153807e-05
##
## $right
## [1] 0.0002218875
# Rope Plot
plot(rope(diff_tda_pca_5.40.5_nb_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_pca_5.40.5_nb.n5_3_fold = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nb_n5_3_fold))
#bf_tda_pca_5.40.5_nb.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_pca_5.40.5_nb_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_pca_5.40.5_nb_n5_3_fold)
## t = -52.541, df = 2, p-value = 0.000362
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.2521603 -0.2139869
## sample estimates:
## mean of x
## -0.2330736
### Test set diff
diff_tda_pca_5.40.5_nb.n5_test<-(nb_cf_ov_acc - ad_tda_pc_5.40.5_n5_nb_cf0_ov_acc)
diff_tda_pca_5.40.5_nb.n5_test
## Accuracy
## 0.01361589
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_pca_5.40.5_nb.n5_test<-BayesianSignTest(as.matrix(diff_tda_pca_5.40.5_nb.n5_test),-0.01,0.01)
bst_tda_pca_5.40.5_nb.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_pca_5.40.5_nb.n5_test_odds.left<-bst_tda_pca_5.40.5_nb.n5_test$probLeft/bst_tda_pca_5.40.5_nb.n5_test$probRight
bst_tda_pca_5.40.5_nb.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_pca_5.40.5_nb.n5_test<-BayesianSignedRank(as.matrix(diff_tda_pca_5.40.5_nb.n5_test),-0.01,0.01)
bsr_tda_pca_5.40.5_nb.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.4554667
##
## $winRight
## [1] 0.5445333
# Bayesian Correlated Test
bct_tda_pca_5.40.5_nb.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_pca_5.40.5_nb.n5_test),0.1,-0.01,0.01)
bct_tda_pca_5.40.5_nb.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_pca_5.40.5_nb.n5_test)))
#BayesFactor
#bf_tda_pca_5.40.5_nb.n5_test = ttestBF(x = as.matrix(diff_tda_pca_5.40.5_nb.n5_test)) #bf_tda_pca_5.40.5_nb.n5_test
#t_test
#t.test(as.matrix(diff_tda_pca_5.40.5_nb.n5_test))
##With TDA KDE filter 5 intervals, 50% overlap, 5 bins
##Node1
Adult_TDA_KDE_5.40.5_n1_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_kde_adult_5.40.5.n1.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Holand.Netherlands, V14.Ireland, V14.Outlying.US.Guam.USVI.etc., V14.Thailand, V14.Vietnam
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.AF.spouse, V7.Armed.Forces, V7.Priv.house.serv, V14.Dominican.Republic, V14.Haiti, V14.Holand.Netherlands, V14.Outlying.US.Guam.USVI.etc.
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Priv.house.serv, V14.Cambodia, V14.Dominican.Republic, V14.Ecuador, V14.Holand.Netherlands, V14.Honduras, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Portugal, V14.Scotland, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.40.5_n1_NbFit0
## Naive Bayes
##
## 11838 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7892, 7892, 7892
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.7522386 0.06447648
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_KDE_5.40.5_n1_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.7592499 0.10220627 Fold1
## 2 0.7569691 0.09122316 Fold2
## 3 0.7404967 0.00000000 Fold3
ad_tda_kde_5.40.5_n1_nb_fit_re<-Adult_TDA_KDE_5.40.5_n1_NbFit0$resample[1]
summary(Adult_TDA_KDE_5.40.5_n1_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_KDE_5.40.5_n1_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n1_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n1_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n1_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2206
## >50K 0 146
##
## Accuracy : 0.7742
## 95% CI : (0.7657, 0.7824)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.000264
##
## Kappa : 0.0913
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 1.00000
## Specificity : 0.06207
## Pos Pred Value : 0.77073
## Neg Pred Value : 1.00000
## Prevalence : 0.75921
## Detection Rate : 0.75921
## Detection Prevalence : 0.98505
## Balanced Accuracy : 0.53104
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n1_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2206
## >50K 0 146
##
## Accuracy : 0.7742
## 95% CI : (0.7657, 0.7824)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.000264
##
## Kappa : 0.0913
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Sensitivity : 1.00000
## Specificity : 0.06207
## Pos Pred Value : 0.77073
## Neg Pred Value : 1.00000
## Prevalence : 0.75921
## Detection Rate : 0.75921
## Detection Prevalence : 0.98505
## Balanced Accuracy : 0.53104
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n1_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7741605242 0.0913174168 0.7657362290 0.7824210465 0.7592137592
## AccuracyPValue McnemarPValue
## 0.0002639809 0.0000000000
ad_tda_kde_5.40.5_n1_nb_cf0_ov_acc<-ad_tda_kde_5.40.5_n1_nb_cf0$overall[1]
ad_tda_kde_5.40.5_n1_nb_cf0$byClas1
## NULL
ad_tda_kde_5.40.5_n1_nb_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n1_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_nb_n1_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.40.5_n1_nb_fit_re)
diff_tda_kde_5.40.5_nb_n1_3_fold
## Accuracy
## 1 0.015398014
## 2 0.008495513
## 3 0.019176893
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_nb.n1_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nb_n1_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_nb.n1_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_nb.n1_3_fold_odds.left<-bst_tda_kde_5.40.5_nb.n1_3_fold$probLeft/bst_tda_kde_5.40.5_nb.n1_3_fold$probRight
bst_tda_kde_5.40.5_nb.n1_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_nb.n1_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nb_n1_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_nb.n1_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.3014667
##
## $winRight
## [1] 0.6985333
# Bayesian Correlated Test
bct_tda_kde_5.40.5_nb.n1_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nb_n1_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nb.n1_3_fold
## $left
## [1] 0.01063927
##
## $rope
## [1] 0.1648335
##
## $right
## [1] 0.8245272
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_nb_n1_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_nb.n1_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nb_n1_3_fold))
#bf_tda_kde_5.40.5_nb.n1_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_nb_n1_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_nb_n1_3_fold)
## t = 4.5911, df = 2, p-value = 0.04431
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.00090203 0.02781158
## sample estimates:
## mean of x
## 0.01435681
### Test set diff
diff_tda_kde_5.40.5_nb.n1_test<-(nb_cf_ov_acc - ad_tda_kde_5.40.5_n1_nb_cf0_ov_acc)
diff_tda_kde_5.40.5_nb.n1_test
## Accuracy
## -0.001330876
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_nb.n1_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nb.n1_test),-0.01,0.01)
bst_tda_kde_5.40.5_nb.n1_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 1
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_nb.n1_test_odds.left<-bst_tda_kde_5.40.5_nb.n1_test$probLeft/bst_tda_kde_5.40.5_nb.n1_test$probRight
bst_tda_kde_5.40.5_nb.n1_test_odds.left
## [1] NaN
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_nb.n1_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nb.n1_test),-0.01,0.01)
bsr_tda_kde_5.40.5_nb.n1_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 1
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_nb.n1_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nb.n1_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nb.n1_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_nb.n1_test)))
#BayesFactor
#bf_tda_kde_5.40.5_nb.n1_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nb.n1_test)) #bf_tda_pca_5.40.5_nb.n1_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_nb.n1_test))
##Node2
Adult_TDA_KDE_5.40.5_n2_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n2.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Unmarried, V8.Wife, V10.Female, V10.Male, V14.Cambodia, V14.Dominican.Republic, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Ireland, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.spouse.absent, V6.Separated, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Other.relative, V8.Own.child, V8.Unmarried, V8.Wife, V10.Female, V10.Male, V14.Cambodia, V14.Dominican.Republic, V14.Ecuador, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Ireland, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Trinadad.Tobago, V14.Vietnam
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V6.Married.AF.spouse, V6.Married.spouse.absent, V6.Separated, V6.Widowed, V7.Armed.Forces, V7.Priv.house.serv, V8.Unmarried, V8.Wife, V10.Female, V10.Male, V14.Cambodia, V14.Dominican.Republic, V14.Ecuador, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Ireland, V14.Laos, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.40.5_n2_NbFit0
## Naive Bayes
##
## 10276 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 6851, 6850, 6851
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.6108408 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_KDE_5.40.5_n2_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.6108029 0 Fold1
## 2 0.6109165 0 Fold2
## 3 0.6108029 0 Fold3
ad_tda_kde_5.40.5_n2_nb_fit_re<-Adult_TDA_KDE_5.40.5_n2_NbFit0$resample[1]
summary(Adult_TDA_KDE_5.40.5_n2_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_KDE_5.40.5_n2_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n2_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n2_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n2_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 0 0
## >50K 7416 2352
##
## Accuracy : 0.2408
## 95% CI : (0.2323, 0.2494)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.2408
## Prevalence : 0.7592
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n2_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 0 0
## >50K 7416 2352
##
## Accuracy : 0.2408
## 95% CI : (0.2323, 0.2494)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 1
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 0.0000
## Specificity : 1.0000
## Pos Pred Value : NaN
## Neg Pred Value : 0.2408
## Prevalence : 0.7592
## Detection Rate : 0.0000
## Detection Prevalence : 0.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n2_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.2407862 0.0000000 0.2323343 0.2493929 0.7592138
## AccuracyPValue McnemarPValue
## 1.0000000 0.0000000
ad_tda_kde_5.40.5_n2_nb_cf0_ov_acc<-ad_tda_kde_5.40.5_n2_nb_cf0$overall[1]
ad_tda_kde_5.40.5_n2_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 0.0000000 1.0000000 NaN
## Neg Pred Value Precision Recall
## 0.2407862 NA 0.0000000
## F1 Prevalence Detection Rate
## NA 0.7592138 0.0000000
## Detection Prevalence Balanced Accuracy
## 0.0000000 0.5000000
ad_tda_kde_5.40.5_n2_nb_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n2_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_nb_n2_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.40.5_n2_nb_fit_re)
diff_tda_kde_5.40.5_nb_n2_3_fold
## Accuracy
## 1 0.1638450
## 2 0.1545481
## 3 0.1488707
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_nb.n2_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nb_n2_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_nb.n2_3_fold
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0.75
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_nb.n2_3_fold_odds.left<-bst_tda_kde_5.40.5_nb.n2_3_fold$probLeft/bst_tda_kde_5.40.5_nb.n2_3_fold$probRight
bst_tda_kde_5.40.5_nb.n2_3_fold_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_nb.n2_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nb_n2_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_nb.n2_3_fold
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.008433333
##
## $winRight
## [1] 0.9915667
# Bayesian Correlated Test
bct_tda_kde_5.40.5_nb.n2_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nb_n2_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nb.n2_3_fold
## $left
## [1] 0.0004615979
##
## $rope
## [1] 0.0001351271
##
## $right
## [1] 0.9994033
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_nb_n2_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_nb.n2_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nb_n2_3_fold))
#bf_tda_kde_5.40.5_nb.n2_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_nb_n2_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_nb_n2_3_fold)
## t = 35.686, df = 2, p-value = 0.0007843
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.1369752 0.1745339
## sample estimates:
## mean of x
## 0.1557546
### Test set diff
diff_tda_kde_5.40.5_nb.n2_test<-(nb_cf_ov_acc - ad_tda_kde_5.40.5_n2_nb_cf0_ov_acc)
diff_tda_kde_5.40.5_nb.n2_test
## Accuracy
## 0.5320434
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_nb.n2_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nb.n2_test),-0.01,0.01)
bst_tda_kde_5.40.5_nb.n2_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_nb.n2_test_odds.left<-bst_tda_kde_5.40.5_nb.n2_test$probLeft/bst_tda_kde_5.40.5_nb.n2_test$probRight
bst_tda_kde_5.40.5_nb.n2_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_nb.n2_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nb.n2_test),-0.01,0.01)
bsr_tda_kde_5.40.5_nb.n2_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.1570333
##
## $winRight
## [1] 0.8429667
# Bayesian Correlated Test
bct_tda_kde_5.40.5_nb.n2_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nb.n2_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nb.n2_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_nb.n2_test)))
#BayesFactor
#bf_tda_kde_5.40.5_nb.n2_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nb.n2_test)) #bf_tda_kde_5.40.5_nb.n2_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_nb.n2_test))
##Node3
Adult_TDA_KDE_5.40.5_n3_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n3.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Ecuador, V14.Holand.Netherlands, V14.Honduras, V14.Outlying.US.Guam.USVI.etc., V14.Trinadad.Tobago, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V14.Dominican.Republic, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Portugal
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.Preschool, V7.Armed.Forces, V7.Priv.house.serv, V14.Columbia, V14.Holand.Netherlands, V14.Honduras, V14.Hungary, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Scotland, V14.Thailand
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.40.5_n3_NbFit0
## Naive Bayes
##
## 11563 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 7708, 7709, 7709
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.7917496 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_KDE_5.40.5_n3_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.7916991 0 Fold1
## 2 0.7919045 0 Fold2
## 3 0.7916450 0 Fold3
ad_tda_kde_5.40.5_n3_nb_fit_re<-Adult_TDA_KDE_5.40.5_n3_NbFit0$resample[1]
summary(Adult_TDA_KDE_5.40.5_n3_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_KDE_5.40.5_n3_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n3_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n3_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n3_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2332
## >50K 0 20
##
## Accuracy : 0.7613
## 95% CI : (0.7527, 0.7697)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.3228
##
## Kappa : 0.0129
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.000000
## Specificity : 0.008503
## Pos Pred Value : 0.760771
## Neg Pred Value : 1.000000
## Prevalence : 0.759214
## Detection Rate : 0.759214
## Detection Prevalence : 0.997952
## Balanced Accuracy : 0.504252
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n3_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2332
## >50K 0 20
##
## Accuracy : 0.7613
## 95% CI : (0.7527, 0.7697)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.3228
##
## Kappa : 0.0129
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.000000
## Specificity : 0.008503
## Pos Pred Value : 0.760771
## Neg Pred Value : 1.000000
## Prevalence : 0.759214
## Detection Rate : 0.759214
## Detection Prevalence : 0.997952
## Balanced Accuracy : 0.504252
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n3_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.76126126 0.01285513 0.75267868 0.76968780 0.75921376
## AccuracyPValue McnemarPValue
## 0.32281324 0.00000000
ad_tda_kde_5.40.5_n3_nb_cf0_ov_acc<-ad_tda_kde_5.40.5_n3_nb_cf0$overall[1]
ad_tda_kde_5.40.5_n3_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.000000000 0.008503401 0.760771440
## Neg Pred Value Precision Recall
## 1.000000000 0.760771440 1.000000000
## F1 Prevalence Detection Rate
## 0.864134234 0.759213759 0.759213759
## Detection Prevalence Balanced Accuracy
## 0.997952498 0.504251701
ad_tda_kde_5.40.5_n3_nb_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n3_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_nb_n3_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.40.5_n3_nb_fit_re)
diff_tda_kde_5.40.5_nb_n3_3_fold
## Accuracy
## 1 -0.01705120
## 2 -0.02643992
## 3 -0.03197145
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_nb.n3_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nb_n3_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_nb.n3_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_nb.n3_3_fold_odds.left<-bst_tda_kde_5.40.5_nb.n3_3_fold$probLeft/bst_tda_kde_5.40.5_nb.n3_3_fold$probRight
bst_tda_kde_5.40.5_nb.n3_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_nb.n3_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nb_n3_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_nb.n3_3_fold
## $winLeft
## [1] 0.9642
##
## $winRope
## [1] 0.0358
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_nb.n3_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nb_n3_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nb.n3_3_fold
## $left
## [1] 0.9526393
##
## $rope
## [1] 0.03743396
##
## $right
## [1] 0.009926781
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_nb_n3_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_nb.n3_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nb_n3_3_fold))
#bf_tda_kde_5.40.5_nb.n3_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_nb_n3_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_nb_n3_3_fold)
## t = -5.7762, df = 2, p-value = 0.02869
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.043891443 -0.006416936
## sample estimates:
## mean of x
## -0.02515419
### Test set diff
diff_tda_kde_5.40.5_nb.n3_test<-(nb_cf_ov_acc - ad_tda_kde_5.40.5_n3_nb_cf0_ov_acc)
diff_tda_kde_5.40.5_nb.n3_test
## Accuracy
## 0.01156839
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_nb.n3_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nb.n3_test),-0.01,0.01)
bst_tda_kde_5.40.5_nb.n3_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_nb.n3_test_odds.left<-bst_tda_kde_5.40.5_nb.n3_test$probLeft/bst_tda_kde_5.40.5_nb.n3_test$probRight
bst_tda_kde_5.40.5_nb.n3_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_nb.n3_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nb.n3_test),-0.01,0.01)
bsr_tda_kde_5.40.5_nb.n3_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.4631
##
## $winRight
## [1] 0.5369
# Bayesian Correlated Test
bct_tda_kde_5.40.5_nb.n3_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nb.n3_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nb.n3_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_nb.n3_test)))
#BayesFactor
#bf_tda_kde_5.40.5_nb.n3_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nb.n3_test)) #bf_tda_kde_5.40.5_nb.n3_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_nb.n3_test))
##Node4
Adult_TDA_KDE_5.40.5_n4_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n4.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Self.emp.inc, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V8.Husband, V14.Cambodia, V14.Columbia, V14.Ecuador, V14.El.Salvador, V14.France, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Iran, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V6.Married.AF.spouse, V7.Armed.Forces, V7.Priv.house.serv, V8.Husband, V14.Cambodia, V14.China, V14.Columbia, V14.Ecuador, V14.El.Salvador, V14.Guatemala, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.Iran, V14.Italy, V14.Jamaica, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Scotland, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.Never.worked, V2.Without.pay, V4.1st.4th, V4.Preschool, V7.Armed.Forces, V8.Husband, V14.Cambodia, V14.Columbia, V14.Cuba, V14.Ecuador, V14.El.Salvador, V14.Greece, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Iran, V14.Ireland, V14.Laos, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Poland, V14.Portugal, V14.Scotland, V14.Thailand, V14.Trinadad.Tobago
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.40.5_n4_NbFit0
## Naive Bayes
##
## 14818 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 9879, 9879, 9878
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.9574167 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_KDE_5.40.5_n4_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.9574813 0 Fold1
## 2 0.9574813 0 Fold2
## 3 0.9572874 0 Fold3
ad_tda_kde_5.40.5_n4_nb_fit_re<-Adult_TDA_KDE_5.40.5_n4_NbFit0$resample[1]
summary(Adult_TDA_KDE_5.40.5_n4_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_KDE_5.40.5_n4_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n4_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n4_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n4_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n4_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n4_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_kde_5.40.5_n4_nb_cf0_ov_acc<-ad_tda_kde_5.40.5_n4_nb_cf0$overall[1]
ad_tda_kde_5.40.5_n4_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_kde_5.40.5_n4_nb_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n4_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_nb_n4_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.40.5_n4_nb_fit_re)
diff_tda_kde_5.40.5_nb_n4_3_fold
## Accuracy
## 1 -0.1828334
## 2 -0.1920167
## 3 -0.1976139
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_nb.n4_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nb_n4_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_nb.n4_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_nb.n4_3_fold_odds.left<-bst_tda_kde_5.40.5_nb.n4_3_fold$probLeft/bst_tda_kde_5.40.5_nb.n4_3_fold$probRight
bst_tda_kde_5.40.5_nb.n4_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_nb.n4_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nb_n4_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_nb.n4_3_fold
## $winLeft
## [1] 0.9909333
##
## $winRope
## [1] 0.009066667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_nb.n4_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nb_n4_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nb.n4_3_fold
## $left
## [1] 0.9996219
##
## $rope
## [1] 7.148566e-05
##
## $right
## [1] 0.000306566
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_nb_n4_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_nb.n4_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nb_n4_3_fold))
#bf_tda_kde_5.40.5_nb.n4_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_nb_n4_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_nb_n4_3_fold)
## t = -44.29, df = 2, p-value = 0.0005094
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.2093589 -0.1722837
## sample estimates:
## mean of x
## -0.1908213
### Test set diff
diff_tda_kde_5.40.5_nb.n4_test<-(nb_cf_ov_acc - ad_tda_kde_5.40.5_n4_nb_cf0_ov_acc)
diff_tda_kde_5.40.5_nb.n4_test
## Accuracy
## 0.01361589
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_nb.n4_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nb.n4_test),-0.01,0.01)
bst_tda_kde_5.40.5_nb.n4_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_nb.n4_test_odds.left<-bst_tda_kde_5.40.5_nb.n4_test$probLeft/bst_tda_kde_5.40.5_nb.n4_test$probRight
bst_tda_kde_5.40.5_nb.n4_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_nb.n4_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nb.n4_test),-0.01,0.01)
bsr_tda_kde_5.40.5_nb.n4_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.4611
##
## $winRight
## [1] 0.5389
# Bayesian Correlated Test
bct_tda_kde_5.40.5_nb.n4_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nb.n4_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nb.n4_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_nb.n4_test)))
#BayesFactor
#bf_tda_kde_5.40.5_nb.n4_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nb.n4_test)) #bf_tda_kde_5.40.5_nb.n4_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_nb.n4_test))
##Node5
Adult_TDA_KDE_5.40.5_n5_NbFit0 <- train(as.factor(adult_df1) ~ ., data = tda.m_adult_5.40.5.n5.vec,
method = 'nb',
trControl = fitControl,
metric='Accuracy')
## Warning: model fit failed for Fold1: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Federal.gov, V2.Local.gov, V2.Never.worked, V2.Private, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.State.gov, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Assoc.voc, V4.Bachelors, V4.Doctorate, V4.HS.grad, V4.Masters, V4.Preschool, V4.Prof.school, V4.Some.college, V5, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Married.spouse.absent, V6.Widowed, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Exec.managerial, V7.Farming.fishing, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Priv.house.serv, V7.Prof.specialty, V7.Protective.serv, V7.Sales, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Not.in.family, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Asian.Pac.Islander, V9.Black, V9.Other, V9.White, V10.Female, V10.Male, V11, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold2: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Federal.gov, V2.Local.gov, V2.Never.worked, V2.Private, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.State.gov, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Assoc.voc, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Widowed, V7.., V7.Adm.clerical, V7.Armed.Forces, V7.Craft.repair, V7.Exec.managerial, V7.Farming.fishing, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Other.service, V7.Priv.house.serv, V7.Prof.specialty, V7.Protective.serv, V7.Sales, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Own.child, V8.Wife, V9.Amer.Indian.Eskimo, V9.Asian.Pac.Islander, V9.Other, V10.Female, V10.Male, V11, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.Vietnam, V14.Yugoslavia
## Warning: model fit failed for Fold3: usekernel=FALSE, fL=0, adjust=1 Error in NaiveBayes.default(x, y, usekernel = FALSE, fL = param$fL, ...) :
## Zero variances for at least one class in variables: V2.., V2.Federal.gov, V2.Local.gov, V2.Never.worked, V2.Private, V2.Self.emp.inc, V2.Self.emp.not.inc, V2.State.gov, V2.Without.pay, V4.10th, V4.11th, V4.12th, V4.1st.4th, V4.5th.6th, V4.7th.8th, V4.9th, V4.Assoc.acdm, V4.Assoc.voc, V4.Bachelors, V4.Doctorate, V4.Masters, V4.Preschool, V4.Prof.school, V6.Divorced, V6.Married.AF.spouse, V6.Married.civ.spouse, V6.Separated, V6.Widowed, V7.., V7.Armed.Forces, V7.Craft.repair, V7.Exec.managerial, V7.Farming.fishing, V7.Handlers.cleaners, V7.Machine.op.inspct, V7.Priv.house.serv, V7.Prof.specialty, V7.Protective.serv, V7.Sales, V7.Tech.support, V7.Transport.moving, V8.Husband, V8.Other.relative, V8.Wife, V9.Amer.Indian.Eskimo, V9.Asian.Pac.Islander, V9.Other, V10.Female, V10.Male, V11, V12, V14.., V14.Cambodia, V14.Canada, V14.China, V14.Columbia, V14.Cuba, V14.Dominican.Republic, V14.Ecuador, V14.El.Salvador, V14.England, V14.France, V14.Germany, V14.Greece, V14.Guatemala, V14.Haiti, V14.Holand.Netherlands, V14.Honduras, V14.Hong, V14.Hungary, V14.India, V14.Iran, V14.Ireland, V14.Italy, V14.Jamaica, V14.Japan, V14.Laos, V14.Mexico, V14.Nicaragua, V14.Outlying.US.Guam.USVI.etc., V14.Peru, V14.Philippines, V14.Poland, V14.Portugal, V14.Puerto.Rico, V14.Scotland, V14.South, V14.Taiwan, V14.Thailand, V14.Trinadad.Tobago, V14.United.States, V14.Vietnam, V14.Yugoslavia
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo,
## : There were missing values in resampled performance measures.
## Warning in train.default(x, y, weights = w, ...): missing values found in
## aggregated results
Adult_TDA_KDE_5.40.5_n5_NbFit0
## Naive Bayes
##
## 12081 samples
## 108 predictor
## 2 classes: ' <=50K', ' >50K'
##
## No pre-processing
## Resampling: Cross-Validated (3 fold)
## Summary of sample sizes: 8053, 8055, 8054
## Resampling results across tuning parameters:
##
## usekernel Accuracy Kappa
## FALSE NaN NaN
## TRUE 0.9996689 0
##
## Tuning parameter 'fL' was held constant at a value of 0
## Tuning
## parameter 'adjust' was held constant at a value of 1
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were fL = 0, usekernel = TRUE and adjust
## = 1.
Adult_TDA_KDE_5.40.5_n5_NbFit0$resample
## Accuracy Kappa Resample
## 1 0.9995035 0 Fold1
## 2 0.9997516 0 Fold2
## 3 0.9997517 0 Fold3
ad_tda_kde_5.40.5_n5_nb_fit_re<-Adult_TDA_KDE_5.40.5_n5_NbFit0$resample[1]
summary(Adult_TDA_KDE_5.40.5_n5_NbFit0)
## Length Class Mode
## apriori 2 table numeric
## tables 108 -none- list
## levels 2 -none- character
## call 6 -none- call
## x 108 data.frame list
## usekernel 1 -none- logical
## varnames 108 -none- character
## xNames 108 -none- character
## problemType 1 -none- character
## tuneValue 3 data.frame list
## obsLevels 2 -none- character
## param 0 -none- list
# Predict outcome using Adult_TDA_KDE_5.40.5_n5_NbFit0 from training data based on testing data
pred0 <- predict(Adult_TDA_KDE_5.40.5_n5_NbFit0, newdata= adult.one_hot_df4Test)
# Create confusion matrix to assess model fit/performance on test data
ad_tda_kde_5.40.5_n5_nb_cf0<-confusionMatrix(data=pred0, as.factor(adult.one_hot_df4Test$adult_df1))
ad_tda_kde_5.40.5_n5_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n5_nb_cf0
## Confusion Matrix and Statistics
##
## Reference
## Prediction <=50K >50K
## <=50K 7416 2352
## >50K 0 0
##
## Accuracy : 0.7592
## 95% CI : (0.7506, 0.7677)
## No Information Rate : 0.7592
## P-Value [Acc > NIR] : 0.5055
##
## Kappa : 0
##
## Mcnemar's Test P-Value : <2e-16
##
## Sensitivity : 1.0000
## Specificity : 0.0000
## Pos Pred Value : 0.7592
## Neg Pred Value : NaN
## Prevalence : 0.7592
## Detection Rate : 0.7592
## Detection Prevalence : 1.0000
## Balanced Accuracy : 0.5000
##
## 'Positive' Class : <=50K
##
ad_tda_kde_5.40.5_n5_nb_cf0$overall
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 0.7592138 0.0000000 0.7506071 0.7676657 0.7592138
## AccuracyPValue McnemarPValue
## 0.5055358 0.0000000
ad_tda_kde_5.40.5_n5_nb_cf0_ov_acc<-ad_tda_kde_5.40.5_n5_nb_cf0$overall[1]
ad_tda_kde_5.40.5_n5_nb_cf0$byClass
## Sensitivity Specificity Pos Pred Value
## 1.0000000 0.0000000 0.7592138
## Neg Pred Value Precision Recall
## NaN 0.7592138 1.0000000
## F1 Prevalence Detection Rate
## 0.8631285 0.7592138 0.7592138
## Detection Prevalence Balanced Accuracy
## 1.0000000 0.5000000
ad_tda_kde_5.40.5_n5_nb_cf0_pre_rec_f1<-ad_tda_kde_5.40.5_n5_nb_cf0$byClass[5:7]
###### Conduct initial Bayesian tests of non-tda-assisted RF vs. tda-assisted RF classifiers
### 3-fold diff
diff_tda_kde_5.40.5_nb_n5_3_fold<-(ad_nb_fit_re - ad_tda_kde_5.40.5_n5_nb_fit_re)
diff_tda_kde_5.40.5_nb_n5_3_fold
## Accuracy
## 1 -0.2248556
## 2 -0.2342870
## 3 -0.2400781
## Bayesian Tests 3-fold diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_nb.n5_3_fold<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nb_n5_3_fold),-0.01,0.01)
bst_tda_kde_5.40.5_nb.n5_3_fold
## $probLeft
## [1] 0.75
##
## $probRope
## [1] 0.25
##
## $probRight
## [1] 0
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_nb.n5_3_fold_odds.left<-bst_tda_kde_5.40.5_nb.n5_3_fold$probLeft/bst_tda_kde_5.40.5_nb.n5_3_fold$probRight
bst_tda_kde_5.40.5_nb.n5_3_fold_odds.left
## [1] Inf
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_nb.n5_3_fold<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nb_n5_3_fold),-0.01,0.01)
bsr_tda_kde_5.40.5_nb.n5_3_fold
## $winLeft
## [1] 0.9906333
##
## $winRope
## [1] 0.009366667
##
## $winRight
## [1] 0
# Bayesian Correlated Test
bct_tda_kde_5.40.5_nb.n5_3_fold<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nb_n5_3_fold),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nb.n5_3_fold
## $left
## [1] 0.9997366
##
## $rope
## [1] 4.153832e-05
##
## $right
## [1] 0.0002218888
# Rope Plot
plot(rope(diff_tda_kde_5.40.5_nb_n5_3_fold,c(-0.01,0.01)))

#BayesFactor
#bf_tda_kde_5.40.5_nb.n5_3_fold = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nb_n5_3_fold))
#bf_tda_kde_5.40.5_nb.n5_3_fold
#t_test
t.test(as.matrix(diff_tda_kde_5.40.5_nb_n5_3_fold))
##
## One Sample t-test
##
## data: as.matrix(diff_tda_kde_5.40.5_nb_n5_3_fold)
## t = -52.541, df = 2, p-value = 0.0003621
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## -0.2521603 -0.2139868
## sample estimates:
## mean of x
## -0.2330736
### Test set diff
diff_tda_kde_5.40.5_nb.n5_test<-(nb_cf_ov_acc - ad_tda_kde_5.40.5_n5_nb_cf0_ov_acc)
diff_tda_kde_5.40.5_nb.n5_test
## Accuracy
## 0.01361589
## Bayesian Tests Test set diff
# Bayesian Sign Test
bst_tda_kde_5.40.5_nb.n5_test<-BayesianSignTest(as.matrix(diff_tda_kde_5.40.5_nb.n5_test),-0.01,0.01)
bst_tda_kde_5.40.5_nb.n5_test
## $probLeft
## [1] 0
##
## $probRope
## [1] 0.5
##
## $probRight
## [1] 0.5
# Odds Left Bayesian Sign Test
bst_tda_kde_5.40.5_nb.n5_test_odds.left<-bst_tda_kde_5.40.5_nb.n5_test$probLeft/bst_tda_kde_5.40.5_nb.n5_test$probRight
bst_tda_kde_5.40.5_nb.n5_test_odds.left
## [1] 0
# Bayesian Signed Rank Test
bsr_tda_kde_5.40.5_nb.n5_test<-BayesianSignedRank(as.matrix(diff_tda_kde_5.40.5_nb.n5_test),-0.01,0.01)
bsr_tda_kde_5.40.5_nb.n5_test
## $winLeft
## [1] 0
##
## $winRope
## [1] 0.4555333
##
## $winRight
## [1] 0.5444667
# Bayesian Correlated Test
bct_tda_kde_5.40.5_nb.n5_test<-correlatedBayesianTtest(as.matrix(diff_tda_kde_5.40.5_nb.n5_test),0.1,-0.01,0.01)
bct_tda_kde_5.40.5_nb.n5_test
## $left
## [1] NA
##
## $rope
## [1] NA
##
## $right
## [1] NA
# Rope Plot
#plot(rope(diff_tda_kde_5.40.5_nb.n5_test)))
#BayesFactor
#bf_tda_kde_5.40.5_nb.n5_test = ttestBF(x = as.matrix(diff_tda_kde_5.40.5_nb.n5_test)) #bf_tda_kde_5.40.5_nb.n5_test
#t_test
#t.test(as.matrix(diff_tda_kde_5.40.5_nb.n5_test))